library(tswge)
library(ggplot2)
library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(formattable)
library(tidyr)
library(stringr)
library(vars)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:formattable':
##
## area
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: urca
## Loading required package: lmtest
library(nnfor)
## Loading required package: forecast
setwd("~/Documents/SMU/Assignments/TimeSeries/final_proj/goldprices")
here::here()
## [1] "/Users/tanviarora/Documents/SMU/Assignments/TimeSeries"
"
define function rollingwindow
Function creates multiple windows of distinct test dataset of size=nahead and returns an average ASE of all the windows
Inputs :
input - time-series data
modelparams - For ARUMA provide a list of inputs (phi,theta,s,d)
For ARMA provide a list of inputs (phi,theta,s=0,d=0)
nahead - how many values to predict
"
## [1] "\ndefine function rollingwindow\nFunction creates multiple windows of distinct test dataset of size=nahead and returns an average ASE of all the windows\nInputs :\n input - time-series data\n\n modelparams - For ARUMA provide a list of inputs (phi,theta,s,d) \n For ARMA provide a list of inputs (phi,theta,s=0,d=0)\n nahead - how many values to predict\n\n"
distrollingwindow <- function(input,modelparams,nahead,modelname) {
horizon=nahead
step_size=nahead
window=nahead
start_index = 1
anchor_index = start_index + window - 1
end_index = anchor_index + horizon
i=1
start_ind=numeric()
end_ind=numeric()
while(end_index < length(input)){
start_ind[i]=start_index
end_ind[i]=end_index
start_index = start_index + step_size
anchor_index = start_index + window - 1
end_index = anchor_index + horizon
i=i+1
}
print("start_index")
print(start_ind)
print("end_index")
print(end_ind)
ASEHolder = numeric()
predHolder = array(data=NA,dim=c(length(input)))
j=end_ind[1]+1
for( i in 1:length(start_ind))
{
faruma=fore.aruma.wge(input[start_ind[i]:(end_ind[i]+horizon)],phi=modelparams$phi,theta=modelparams$theta,d=modelparams$d,s=modelparams$s,n.ahead=nahead, lastn=T,plot=F)
ASE = mean((input[(end_ind[i]+1):(end_ind[i]+nahead)] - faruma$f)^2)
ASEHolder[i] = ASE
for (f in faruma$f){
predHolder[j]=f
j=j+1
}
}
print("predHolder")
print(predHolder)
output = cbind.data.frame(seq.int(nrow(predHolder)),predHolder)
print("output")
print("=======")
output
names(output)[1]="timeseq"
names(output)[2]="pred"
names(input)[1]="X"
names(input)[2]="actuals"
toplot=cbind(output,input)
print(head(toplot))
print("ASE of each window")
print(ASEHolder)
print("WindowedASE")
WindowedASE = mean(ASEHolder)
SDWindowedASE=sd(ASEHolder)
print(WindowedASE)
print(SDWindowedASE)
p=ggplot(toplot,aes(timeseq))
p=p+geom_line(aes(y=pred,color="blue",label="predictions"))
p=p+geom_line(aes(y=input,color="red",label="actuals"))
p=p+theme(panel.background = element_rect(fill = "white",colour = "lightgrey"),panel.grid.major.x = element_line(size = 1, colour="#DAE8FC"),panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = "grey"),plot.title = element_text(size=18,hjust = 0.5),legend.position= "right",legend.text = element_text(color="black"))
p=p+scale_x_continuous(name="Rolling Windows", breaks=seq(0,492,6))
p=p+ylab("actual vs predictions")
p=p+ggtitle(paste("Actual values vs Predictions for ",modelname))
p=p+scale_color_manual(name="Legend",labels=c("predictions","actuals"),values=c("red","blue"))
print(p)
return (WindowedASE)
}
"
define function rollingwindow
Function creates multiple windows of distinct test dataset of size=nahead and returns an average ASE of all the windows
Inputs :
input - time-series data
modelparams - For ARUMA provide a list of inputs (phi,theta,s,d)
For ARMA provide a list of inputs (phi,theta,s=0,d=0)
nahead - how many values to predict
"
## [1] "\ndefine function rollingwindow\nFunction creates multiple windows of distinct test dataset of size=nahead and returns an average ASE of all the windows\nInputs :\n input - time-series data\n\n modelparams - For ARUMA provide a list of inputs (phi,theta,s,d) \n For ARMA provide a list of inputs (phi,theta,s=0,d=0)\n nahead - how many values to predict\n\n"
distrollingwindow_v2 <- function(input,modelparams,nahead,modelname,windowsize,modeltype) {
horizon=nahead
step_size=nahead
window=windowsize
start_index = 1
anchor_index = start_index + window - 1
end_index = anchor_index + horizon
ind=1
start_ind=numeric()
end_ind=numeric()
if (modeltype=="Univariate"){
inputsize=length(input)
}
else {
inputsize=nrow(input)
}
print("inputsize")
print(inputsize)
while(end_index < inputsize){
start_ind[ind]=start_index
end_ind[ind]=end_index
start_index = start_index + step_size
anchor_index = start_index + window - 1
end_index = anchor_index + horizon
ind=ind+1
}
print("start_index")
print(start_ind)
print("end_index")
print(end_ind)
ASEHolder = numeric()
predHolder = array(data=NA,dim=c(inputsize))
print(length(predHolder))
j=end_ind[1]+1
if (modeltype=="Univariate"){
for( i in 1:length(start_ind))
{
#print("loop :",i)
faruma=fore.aruma.wge(input[start_ind[i]:(end_ind[i]+horizon)],phi=modelparams$phi,theta=modelparams$theta,d=modelparams$d,s=modelparams$s,n.ahead=nahead, lastn=T,plot=F)
ASE = mean((input[(end_ind[i]+1):(end_ind[i]+nahead)] - faruma$f)^2)
ASEHolder[i] = ASE
print(faruma$f)
for (f in faruma$f){
predHolder[j]=f
j=j+1
}
}
print("predHolder")
print(predHolder)
print(length(predHolder))
output = cbind.data.frame(seq.int(nrow(predHolder)),predHolder)
print("output")
print("=======")
output
names(output)[1]="timeseq"
names(output)[2]="pred"
names(input)[1]="X"
names(input)[2]="actuals"
toplot=cbind(output,input)
names(toplot)[3]="actuals"
print(tail(toplot))
}
else if (modeltype=="VAR"){
print("inside VAR")
for( i in 1:length(start_ind))
{
gp_inr_var_train=input[start_ind[i]:end_ind[i],]
lsfit_trend=VAR(gp_inr_var_train,p=modelparams$p,type=modelparams$type)
preds_trend=predict(lsfit_trend,n.ahead=nahead)
fore_trend.Indian.rupee=preds_trend$fcst$Indian.rupee[,1]
actual.Indian.rupee=input$Indian.rupee[(end_ind[i]+1):(end_ind[i]+nahead)]
var_trend.ase=mean((fore_trend.Indian.rupee-actual.Indian.rupee)^2)
ASEHolder[i] = var_trend.ase
for (f in fore_trend.Indian.rupee){
predHolder[j]=f
j=j+1
}
}
print("finished VAR Rolling Window. Compiling results")
print("predHolder")
print(predHolder)
print(class(predHolder))
print(length(predHolder))
output=cbind.data.frame(seq.int(length(predHolder)),predHolder)
print("output")
print("=======")
print(output)
print("change names of output")
names(output)[1]="timeseq"
names(output)[2]="pred"
print("create toplot dataframe")
toplot=cbind(output,input$Indian.rupee)
names(toplot)[3]="actuals"
print(tail(toplot))
}
else if (modeltype=="NN"){
print("inside NN")
for( i in 1:length(start_ind))
{
gp_inr_nn_tr=input[start_ind[i]:end_ind[i],]
gp_inr_nn_tt=input[(end_ind[i]+1):(end_ind[i]+6),]
gp_inr_nn_train=ts(gp_inr_nn_tr$Indian.rupee)
set.seed(7)
modelparams2=list(reps=50,modelfit_exchrt=gp_inr_nn.fit.mlp.t.excrt,modelfit_premdc=gp_inr_nn.fit.mlp.t.premdc,modelfit=gp_inr_nn.fit.mlp.t.reg)
gp_inr_nn.fore.mlp.t.excrt=forecast(modelparams$modelfit_exchrt,y=ts(gp_inr_nn_tr$exchange_rt),h=nahead)
gp_inr_nn.fore.mlp.t.premdc=forecast(modelparams$modelfit_premdc,y=ts(gp_inr_nn_tr$prem_dc_rt),h=nahead)
gp_inr_nn_train_reg.fore=data.frame(exchange_rt=ts(c(gp_inr_nn_tr$exchange_rt,gp_inr_nn.fore.mlp.t.excrt$mean)),prem_dc_rt=ts(c(gp_inr_nn_tr$prem_dc_rt,gp_inr_nn.fore.mlp.t.premdc$mean)))
gp_inr_nn.fore.mlp.t.reg=forecast(modelparams$modelfit,y=gp_inr_nn_train,xreg=gp_inr_nn_train_reg.fore,h=nahead)
gp_inr_nn.ase.mlp.t.reg=mean((gp_inr_nn_tt$Indian.rupee-gp_inr_nn.fore.mlp.t.reg$mean)^2)
ASEHolder[i] = gp_inr_nn.ase.mlp.t.reg
for (f in gp_inr_nn.fore.mlp.t.reg$mean){
predHolder[j]=rev(f)
j=j+1
}
}
print("finished VAR Rolling Window. Compiling results")
print("predHolder")
output=cbind.data.frame(seq.int(length(predHolder)),predHolder)
print("output")
print("=======")
print(output)
print("change names of output")
names(output)[1]="timeseq"
names(output)[2]="pred"
print("create toplot dataframe")
toplot=cbind(output,input$Indian.rupee)
names(toplot)[3]="actuals"
print(tail(toplot))
}
else if (modeltype=="NN_T"){
print("inside NN with only Time as regressor")
for( i in 1:length(start_ind))
{
gp_inr_nn_tr=input[start_ind[i]:end_ind[i],]
gp_inr_nn_tt=input[(end_ind[i]+1):(end_ind[i]+6),]
gp_inr_nn_train=ts(gp_inr_nn_tr$Indian.rupee)
set.seed(7)
#gp_inr_nn.fit.mlp.t.reg=mlp(gp_inr_nn_train,reps=50)
#gp_inr_nn.fore.mlp.t.reg=forecast(gp_inr_nn.fit.mlp.t.reg,h=nahead)
gp_inr_nn.fore.mlp.t.reg=forecast(modelparams$modelfit,y=gp_inr_nn_train,h=nahead)
gp_inr_nn.ase.mlp.t.reg=mean((gp_inr_nn_tt$Indian.rupee-gp_inr_nn.fore.mlp.t.reg$mean)^2)
ASEHolder[i] = gp_inr_nn.ase.mlp.t.reg
for (f in gp_inr_nn.fore.mlp.t.reg$mean){
predHolder[j]=rev(f)
j=j+1
}
}
print("finished VAR Rolling Window. Compiling results")
print("predHolder")
output=cbind.data.frame(seq.int(length(predHolder)),predHolder)
print("output")
print("=======")
print(output)
print("change names of output")
names(output)[1]="timeseq"
names(output)[2]="pred"
print("create toplot dataframe")
toplot=cbind(output,input$Indian.rupee)
names(toplot)[3]="actuals"
print(tail(toplot))
}
else if (modeltype=="Ensemble"){
print("inside Ensemble")
output=cbind.data.frame(seq.int(length(predHolder)),predHolder)
print("output")
print("=======")
print(output)
print("change names of output")
names(output)[1]="timeseq"
names(output)[2]="pred"
print("create toplot dataframe")
toplot=cbind(output,input$Indian.rupee)
names(toplot)[3]="actuals"
print(tail(toplot))
}
#print("ASE of each window")
print(ASEHolder)
print("WindowedASE")
WindowedASE = mean(ASEHolder)
SDWindowedASE=sd(ASEHolder)
print(WindowedASE)
print("std deviation of Windowed ASE")
print(SDWindowedASE)
p=ggplot(toplot,aes(timeseq))
p=p+geom_line(aes(y=pred,color="blue",label="predictions"))
p=p+geom_line(aes(y=actuals,color="red",label="actuals"))
p=p+theme(panel.background = element_rect(fill = "white",colour = "lightgrey"),panel.grid.major.x = element_line(size = 1, colour="#DAE8FC"),panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = "grey"),plot.title = element_text(size=18,hjust = 0.5),legend.position= "right",legend.text = element_text(color="black"))
p=p+scale_x_continuous(name="Rolling Windows", breaks=seq(0,92,6))
p=p+ylab("actual vs predictions")
p=p+ggtitle(paste("Actual values vs Predictions for ",modelname))
p=p+scale_color_manual(name="Legend",labels=c("predictions","actuals"),values=c("red","blue"))
print(p)
#results=()
#results$WASE=WindowedASE
#results$predictions=predHolder
return(list(WASE=WindowedASE,predictions=predHolder))
}
Gold Price data from 12/31/1978 to 02/28/2020 ( 40+ years Monthly Average of Gold Prices)
Key Currencies -
1- US.dollar (USD)
2- Euro(EUR)
3- Japanese.yen ( JPY)
4- Pound Sterling ( GBP)
Major Consumer Countries -
1- Indian.rupee ( INR)
2- Chinese.renmimbi ( CNY)
3- US.dollar(USD)
4- Turkish.lira ( TRY)
5- Saudi.riyal ( SAR)
Major Producer Countries -
1- US.dollar ( USD)
2- South.African.rand ( ZAR)
3- Chinese.renmimbi ( CNY)
4- Canadian.dollar (CAD)
5- Australian.dollar ( AUD)
sourcefile <- paste(getwd(), "/Monthly_GoldPrice.csv", sep = '')
gold_price=read.csv2(file=sourcefile,sep=",",header=TRUE, stringsAsFactors = FALSE)
head(gold_price$Name)
## [1] "12/31/78" "1/31/79" "2/28/79" "3/30/79" "4/30/79" "5/31/79"
tail(gold_price$Name)
## [1] "9/30/19" "10/31/19" "11/29/19" "12/31/19" "1/31/20" "2/28/20"
dim(gold_price)
## [1] 495 25
str(gold_price)
## 'data.frame': 495 obs. of 25 variables:
## $ Name : chr "12/31/78" "1/31/79" "2/28/79" "3/30/79" ...
## $ US.dollar : chr "207.8" "227.3" "245.7" "242.1" ...
## $ Euro : chr "129.2" "139.5" "151.3" "149.3" ...
## $ Japanese.yen : chr "" "44,853.1" "49,213.2" "49,989.4" ...
## $ Pound.sterling : chr "104.6" "113.3" "122.5" "118.8" ...
## $ Canadian.dollar : chr "" "269.5" "293.2" "284.2" ...
## $ Swiss.franc : chr "" "378.8" "410.6" "407.3" ...
## $ Indian.rupee : chr "" "1,852.8" "2,010.4" "1,974.9" ...
## $ Chinese.renmimbi : chr "" "" "" "" ...
## $ US.dollar.1 : chr "207.8" "227.3" "245.7" "242.1" ...
## $ Turkish.lira : chr "" "" "" "" ...
## $ Saudi.riyal : chr "" "754.1" "822.3" "810.1" ...
## $ Indonesian.rupiah : chr "" "141,796.4" "153,099.6" "151,068.5" ...
## $ UAE.dirham : chr "" "869.4" "939.1" "926.2" ...
## $ Thai.baht : chr "" "4,582.2" "4,949.5" "4,884.5" ...
## $ Vietnamese.dong : chr "" "" "" "" ...
## $ Egyptian.pound : chr "" "" "" "" ...
## $ Korean.won : chr "" "111,186.6" "118,284.0" "116,507.1" ...
## $ Euro.1 : chr "129.2" "139.5" "151.3" "149.3" ...
## $ Russian.ruble : chr "" "" "" "" ...
## $ US.dollar.2 : chr "207.8" "227.3" "245.7" "242.1" ...
## $ South.African.rand: chr "" "197.0" "209.9" "204.4" ...
## $ Chinese.renmimbi.1: chr "" "" "" "" ...
## $ Canadian.dollar.1 : chr "" "269.5" "293.2" "284.2" ...
## $ Australian.dollar : chr "" "198.6" "216.7" "215.9" ...
gp=data.frame(gold_price$Name,as.double((gsub(",","",gold_price$US.dollar))),as.double((gsub(",","",gold_price$Indian.rupee))),as.double((gsub(",","",gold_price$South.African.rand))))
colnames(gp)=c("Month_end_dt","US.dollar","Indian.rupee","SouthAfrican.rand")
dim(gp)
## [1] 495 4
str(gp)
## 'data.frame': 495 obs. of 4 variables:
## $ Month_end_dt : Factor w/ 495 levels "1/29/10","1/29/16",..: 150 29 190 219 275 318 342 399 440 459 ...
## $ US.dollar : num 208 227 246 242 239 ...
## $ Indian.rupee : num NA 1853 2010 1975 1961 ...
## $ SouthAfrican.rand: num NA 197 210 204 203 ...
summary(gp)
## Month_end_dt US.dollar Indian.rupee SouthAfrican.rand
## 1/29/10: 1 Min. : 207.8 Min. : 1853 Min. : 197.0
## 1/29/16: 1 1st Qu.: 349.5 1st Qu.: 6132 1st Qu.: 968.7
## 1/29/82: 1 Median : 409.4 Median : 12653 Median : 1759.7
## 1/29/88: 1 Mean : 653.8 Mean : 29577 Mean : 5158.9
## 1/29/93: 1 3rd Qu.:1055.8 3rd Qu.: 50190 3rd Qu.: 8440.1
## 1/29/99: 1 Max. :1771.9 Max. :114200 Max. :24012.2
## (Other):489 NA's :1 NA's :1
which(is.na(gp$US.dollar))
## integer(0)
which(is.na(gp$Indian.rupee))
## [1] 1
which(is.na(gp$SouthAfrican.rand))
## [1] 1
gp=gp[2:nrow(gp),]
dim(gp)
## [1] 494 4
summary(gp)
## Month_end_dt US.dollar Indian.rupee SouthAfrican.rand
## 1/29/10: 1 Min. : 227.3 Min. : 1853 Min. : 197.0
## 1/29/16: 1 1st Qu.: 350.5 1st Qu.: 6132 1st Qu.: 968.7
## 1/29/82: 1 Median : 409.8 Median : 12653 Median : 1759.7
## 1/29/88: 1 Mean : 654.7 Mean : 29577 Mean : 5158.9
## 1/29/93: 1 3rd Qu.:1062.0 3rd Qu.: 50190 3rd Qu.: 8440.1
## 1/29/99: 1 Max. :1771.9 Max. :114200 Max. :24012.2
## (Other):488
head(gp$Month_end_dt)
## [1] 1/31/79 2/28/79 3/30/79 4/30/79 5/31/79 6/29/79
## 495 Levels: 1/29/10 1/29/16 1/29/82 1/29/88 1/29/93 1/29/99 1/30/04 ... 9/30/99
tail(gp$Month_end_dt)
## [1] 9/30/19 10/31/19 11/29/19 12/31/19 1/31/20 2/28/20
## 495 Levels: 1/29/10 1/29/16 1/29/82 1/29/88 1/29/93 1/29/99 1/30/04 ... 9/30/99
plotts.sample.wge(gp$US.dollar)
plotts.sample.wge(gp$Indian.rupee)
plotts.sample.wge(gp$SouthAfrican.rand)
From above plots, we can see that the realizations look slightly different for gold prices in 3 different currencies. Looks like currency exchange could possibly impacting the differences. We will evaluate this relation later.
For our initial analysis, we will pick up Gold Prices in US.dollar
plotts.sample.wge(gp$US.dollar)
par(mfrow = c(2,1))
t=length(gp$Month_end_dt)
acf(gp$US.dollar[1:t/2], plot=TRUE, col="red")
acf(gp$US.dollar[(t/2):t], plot=TRUE, col="blue")
Stationary/NonStatinary
Looking for the 3 conditions of stationarity:
Condition 1 : Mean is not dependent on time : From the realization it is very evident that the gold prices have sored high with time. So this condition is not met.
Condition 2 : Variance is not dependent on time : It is difficult to say from just 1 realization about variance. But if we look at local variances, we do not see changing variability at different times. So we will not comment on this condition for now.
Condition 3 : ACFs are not dependent on time but have similar pattern between to time differences When we look at overall ACF of the entire realization, it looks slowly dampening but when we look into first half of realization and compare it with second half, first half shows faster dampening of ACFs as compared to the second half.
If there is a thing as splitting a realization into 2 separate timeseries , stationarity can be revised but based on the Gold Prices in USD for the entire realization, this data is not stationary
ACFs and Spectral Densities
ARMA/ARIMA
Model 1 : ARIMA
Because of the slowly dampening nature of ACFs, there is a possibility of d factor. We will try to difference the data
gp.1=artrans.wge(gp$US.dollar,phi.tr=1)
plotts.sample.wge(gp.1,arlimits=TRUE)
First differenced data looks somewhat like white noise.ACFs are within the limits except for 1 or 2 places. Data seems to be highly variable in the later part of the realization. Next we will try to fit an ARMA model on the first differenced data
aic5.wge(gp.1,p=0:14,q=0:2,type="aic")
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 16 5 0 6.863911
## 17 5 1 6.867910
## 19 6 0 6.867932
## 6 1 2 6.868670
## 34 11 0 6.868794
aic5.wge(gp.1,p=0:14,q=0:2,type="bic")
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 2 0 1 6.891523
## 5 1 1 6.895733
## 3 0 2 6.899738
## 4 1 0 6.899992
## 7 2 0 6.901786
aic and aicc returned AR(5) as the favorable model. bic returns a MA(2) as favorable. Also AR(5) is not in the top 5 of the BIC . We will choose to go with AR(5) model
gp.1.ar5.est=est.ar.wge(gp.1,p=5, type='burg')
##
## Coefficients of Original polynomial:
## 0.2317 -0.1124 0.0381 -0.0488 0.1537
##
## Factor Roots Abs Recip System Freq
## 1-0.5295B+0.4991B^2 0.5305+-1.3124i 0.7064 0.1889
## 1-0.6874B 1.4548 0.6874 0.0000
## 1+0.9851B+0.4481B^2 -1.0992+-1.0116i 0.6694 0.3816
##
##
gp.1.ar5.est$phi
## [1] 0.23171552 -0.11235863 0.03812619 -0.04876193 0.15371736
gp.1.ar5.est$avar
## [1] 934.0892
mean(gp$US.dollar)
## [1] 654.6814
Final Estimated model can be written as :
(1-B)(1-0.2317B + 0.1124B^2 - 0.0381B^3 + 0.04876B^4 - 0.1537B^5) (X_t - 654.6814) = a_t
sigma_hat2_a = 934.0892
gp.1.ar5.fore=fore.aruma.wge(gp$US.dollar,phi=gp.1.ar5.est$phi, d=1 , lastn = TRUE , n.ahead=6 , plot = T, limits=TRUE)
# Actual values
gp$US.dollar[(length(gp$US.dollar)-5):length(gp$US.dollar)]
#Forecasted Values
gp.1.ar5.fore$f
gp.1.ar5.ase=mean((gp$US.dollar[(length(gp$US.dollar)-5):length(gp$US.dollar)]-gp.1.ar5.fore$f)^2)
gp.1.ar5.ase
nahead=6
modelparams1=list(phi=gp.1.ar5.est$phi,theta=0,s=0,d=1)
wase1_dist=distrollingwindow(gp$US.dollar[1:492],modelparams1,nahead,"ARIMA model")
## Warning: Ignoring unknown aesthetics: label
## Warning: Ignoring unknown aesthetics: label
## Warning: Removed 12 rows containing missing values (geom_path).
## Model appropriateness
ljung.wge(gp.1.ar5.est$res, p=5)
ljung.wge(gp.1.ar5.est$res, p=5,K=48)
Model 2 : Looking for seasonality First differenced data shows a very prominent peak at freq = 0 and ~ 0.2 , we will check for common factors around s=5 . Next model we will create is an ARIMA with a seasonality factor. If we think of it, Gold prices, like stock prices should not have any seasonality but it can be worth checking.
gp.1.s4=artrans.wge(gp.1,phi=c(rep(0,3),1))
gp.1.s5=artrans.wge(gp.1,phi=c(rep(0,4),1))
gp.1.s6=artrans.wge(gp.1,phi=c(rep(0,5),1))
plotts.sample.wge(gp.1.s5,arlimits=T)
## $autplt
## [1] 1.0000000000 0.1995363714 -0.0699781376 0.0361575270 -0.1043240950
## [6] -0.4199439918 -0.1128815121 0.0206826077 -0.0300527792 -0.0378842232
## [11] -0.0853090096 0.0621505091 0.0287742274 0.0041673611 0.0449200738
## [16] 0.0234217202 0.0115106718 0.0005540516 0.0059840059 -0.0547050152
## [21] 0.0222113980 -0.0094660980 -0.0117671902 0.0285587519 0.0581440852
## [26] -0.0646551446
##
## $freq
## [1] 0.002049180 0.004098361 0.006147541 0.008196721 0.010245902 0.012295082
## [7] 0.014344262 0.016393443 0.018442623 0.020491803 0.022540984 0.024590164
## [13] 0.026639344 0.028688525 0.030737705 0.032786885 0.034836066 0.036885246
## [19] 0.038934426 0.040983607 0.043032787 0.045081967 0.047131148 0.049180328
## [25] 0.051229508 0.053278689 0.055327869 0.057377049 0.059426230 0.061475410
## [31] 0.063524590 0.065573770 0.067622951 0.069672131 0.071721311 0.073770492
## [37] 0.075819672 0.077868852 0.079918033 0.081967213 0.084016393 0.086065574
## [43] 0.088114754 0.090163934 0.092213115 0.094262295 0.096311475 0.098360656
## [49] 0.100409836 0.102459016 0.104508197 0.106557377 0.108606557 0.110655738
## [55] 0.112704918 0.114754098 0.116803279 0.118852459 0.120901639 0.122950820
## [61] 0.125000000 0.127049180 0.129098361 0.131147541 0.133196721 0.135245902
## [67] 0.137295082 0.139344262 0.141393443 0.143442623 0.145491803 0.147540984
## [73] 0.149590164 0.151639344 0.153688525 0.155737705 0.157786885 0.159836066
## [79] 0.161885246 0.163934426 0.165983607 0.168032787 0.170081967 0.172131148
## [85] 0.174180328 0.176229508 0.178278689 0.180327869 0.182377049 0.184426230
## [91] 0.186475410 0.188524590 0.190573770 0.192622951 0.194672131 0.196721311
## [97] 0.198770492 0.200819672 0.202868852 0.204918033 0.206967213 0.209016393
## [103] 0.211065574 0.213114754 0.215163934 0.217213115 0.219262295 0.221311475
## [109] 0.223360656 0.225409836 0.227459016 0.229508197 0.231557377 0.233606557
## [115] 0.235655738 0.237704918 0.239754098 0.241803279 0.243852459 0.245901639
## [121] 0.247950820 0.250000000 0.252049180 0.254098361 0.256147541 0.258196721
## [127] 0.260245902 0.262295082 0.264344262 0.266393443 0.268442623 0.270491803
## [133] 0.272540984 0.274590164 0.276639344 0.278688525 0.280737705 0.282786885
## [139] 0.284836066 0.286885246 0.288934426 0.290983607 0.293032787 0.295081967
## [145] 0.297131148 0.299180328 0.301229508 0.303278689 0.305327869 0.307377049
## [151] 0.309426230 0.311475410 0.313524590 0.315573770 0.317622951 0.319672131
## [157] 0.321721311 0.323770492 0.325819672 0.327868852 0.329918033 0.331967213
## [163] 0.334016393 0.336065574 0.338114754 0.340163934 0.342213115 0.344262295
## [169] 0.346311475 0.348360656 0.350409836 0.352459016 0.354508197 0.356557377
## [175] 0.358606557 0.360655738 0.362704918 0.364754098 0.366803279 0.368852459
## [181] 0.370901639 0.372950820 0.375000000 0.377049180 0.379098361 0.381147541
## [187] 0.383196721 0.385245902 0.387295082 0.389344262 0.391393443 0.393442623
## [193] 0.395491803 0.397540984 0.399590164 0.401639344 0.403688525 0.405737705
## [199] 0.407786885 0.409836066 0.411885246 0.413934426 0.415983607 0.418032787
## [205] 0.420081967 0.422131148 0.424180328 0.426229508 0.428278689 0.430327869
## [211] 0.432377049 0.434426230 0.436475410 0.438524590 0.440573770 0.442622951
## [217] 0.444672131 0.446721311 0.448770492 0.450819672 0.452868852 0.454918033
## [223] 0.456967213 0.459016393 0.461065574 0.463114754 0.465163934 0.467213115
## [229] 0.469262295 0.471311475 0.473360656 0.475409836 0.477459016 0.479508197
## [235] 0.481557377 0.483606557 0.485655738 0.487704918 0.489754098 0.491803279
## [241] 0.493852459 0.495901639 0.497950820 0.500000000
##
## $db
## [1] -29.503708565 -17.510063800 -11.423777559 -9.318008875 -6.634459734
## [6] -19.985388500 -10.060122383 -4.576323643 -2.271800847 -1.091407488
## [11] -8.488840132 -3.787449287 -2.613394223 -4.322773754 -7.214680179
## [16] -12.003963257 -12.574122168 -2.018318791 -2.693440526 -2.162859385
## [21] -4.615299383 -14.604584599 -0.042094813 4.169608774 6.503395748
## [26] 4.423456552 -2.930914955 -3.896121091 -0.938215300 -0.005345015
## [31] 2.892201606 1.882682033 2.355889319 7.308433054 6.284470251
## [36] 4.807520340 5.463067524 -2.986191199 1.235174743 2.961987599
## [41] -3.445793403 5.472598129 2.214831589 1.361495193 8.165809990
## [46] 6.365967220 -1.864657159 2.072911955 7.375936051 -7.377751590
## [51] -1.464196999 -0.382124756 6.277954323 5.334192293 2.305652830
## [56] -0.683978302 1.493746063 0.152204080 -14.457379137 0.811395446
## [61] -7.444540572 5.081573199 -1.079877987 -3.013414701 -7.174427340
## [66] 4.470342177 6.491143917 1.613080665 2.046601357 6.904089696
## [71] 5.874750740 4.217637333 -6.704414834 -1.255931241 -14.073638892
## [76] -3.995697160 -5.659918449 -4.862247438 0.731935114 1.823486394
## [81] 2.402147462 2.926131106 0.063520149 0.486945016 -7.034973010
## [86] -9.000088071 -6.379874696 -4.776237102 -3.624572646 -6.127553100
## [91] -6.116041006 -18.822537795 -11.641554861 -10.245243499 -43.915766976
## [96] -14.467062832 -14.305161924 -19.870155414 -18.011455481 -18.337856520
## [101] -15.680913077 -15.109672953 -20.883826716 -13.412823223 -12.491183482
## [106] -8.243733929 -3.440121695 -3.933649703 -10.141213263 -14.793360532
## [111] -8.946843150 -0.250271237 -7.665159600 -5.632991442 -2.220694487
## [116] -3.779693650 -6.084993242 -20.452407713 -10.139929192 -3.484792792
## [121] -1.748292049 6.037072764 5.770524910 6.000215422 -5.002329102
## [126] 1.119721514 0.628279528 -0.140488290 5.184755286 -1.746681839
## [131] 0.495208469 0.388136945 5.819689440 2.752313815 -2.656309332
## [136] 0.523698022 -8.611252363 6.374174094 3.505604762 0.331540411
## [141] 0.241767399 2.420422657 1.205724777 6.080959366 1.476610171
## [146] 1.793222863 6.958597245 0.556595312 -16.746211087 3.015974359
## [151] -14.925962574 -3.502614367 -0.383945452 2.256838476 0.936181486
## [156] -3.909527702 0.313702655 4.716062018 2.008595721 -7.979589551
## [161] -1.148577100 3.701702383 -3.919066354 -3.862667803 -0.963714684
## [166] 3.015568562 4.950543613 2.558001839 -1.277388405 3.204517777
## [171] -0.193989890 -13.455397488 0.602665285 -2.470105218 -11.771362352
## [176] -9.309135681 -16.426444128 1.500236934 1.137133221 -1.751058700
## [181] -6.642881330 -3.520411488 -2.254265124 -3.197796793 -7.919829133
## [186] -11.920144216 -9.092155421 -13.326565160 -10.417062372 -19.504415328
## [191] -11.254360976 -22.262794368 -26.507977269 -22.024832146 -27.058280459
## [196] -25.804312246 -17.225921980 -12.732967695 -16.349821988 -14.439051427
## [201] -13.426814951 -15.177463138 -17.363316791 -11.668781897 -7.320591844
## [206] -8.049754610 -14.525553803 -3.724408799 -3.715942218 -10.071304408
## [211] -1.486257299 0.370886110 -3.493543424 -9.810544516 -13.522975869
## [216] -16.879320341 -6.422543684 -2.034621175 0.628198924 5.297460927
## [221] 0.818936683 -8.945739131 -0.699719878 -1.735456674 1.227682908
## [226] -4.265064741 -6.588943017 -4.346378066 -18.767684474 -7.947925835
## [231] -2.564008649 -1.235195860 -2.023861881 -3.843876218 1.399539931
## [236] 0.076728349 -3.557176300 -1.369652677 -17.604594004 -4.835136946
## [241] 0.379643570 -0.909107441 -3.995238484 -5.357067484
##
## $dbz
## [1] -8.607315412 -8.401025234 -8.086188718 -7.695257270 -7.258588458
## [6] -6.799198851 -6.331370563 -5.861759301 -5.391617930 -4.919243930
## [11] -4.442161826 -3.958764524 -3.469264958 -2.975941024 -2.482799333
## [16] -1.994887961 -1.517512249 -1.055553697 -0.613000344 -0.192711905
## [21] 0.203611975 0.575315120 0.922576512 1.246158855 1.547152233
## [26] 1.826741635 2.086018814 2.325851099 2.546811861 2.749169659
## [31] 2.932926454 3.097890773 3.243769962 3.370266617 3.477167305
## [36] 3.564415720 3.632166506 3.680819357 3.711035233 3.723737637
## [41] 3.720101945 3.701535169 3.669647610 3.626217037 3.573145622
## [46] 3.512410178 3.446007136 3.375895115 3.303939166 3.231861325
## [51] 3.161201294 3.093288649 3.029224407 2.969865913 2.915806406
## [56] 2.867340720 2.824412115 2.786542160 2.752754123 2.721508076
## [61] 2.690669893 2.657534759 2.618918556 2.571319622 2.511142126
## [66] 2.434963673 2.339825294 2.223520941 2.084863889 1.923906156
## [71] 1.742082361 1.542241539 1.328522998 1.106032846 0.880296683
## [76] 0.656509115 0.438669101 0.228759508 0.026161969 -0.172537984
## [81] -0.373303755 -0.584033405 -0.813743046 -1.071730330 -1.366852963
## [86] -1.706980202 -2.098603340 -2.546540924 -3.053643941 -3.620384614
## [91] -4.244189744 -4.918356193 -5.630389124 -6.359717274 -7.075145023
## [96] -7.733339483 -8.281039585 -8.664150871 -8.843582180 -8.810292013
## [101] -8.588496635 -8.224274010 -7.768515674 -7.264427992 -6.742645867
## [106] -6.221614491 -5.710138834 -5.210278835 -4.719932986 -4.235008038
## [111] -3.751205803 -3.265414626 -2.776633097 -2.286356788 -1.798435908
## [116] -1.318518846 -0.853271824 -0.409572303 0.006178661 0.388556442
## [121] 0.733422596 1.038080297 1.301323068 1.523417868 1.706048216
## [126] 1.852225354 1.966159410 2.053070738 2.118916959 2.170017078
## [131] 2.212573091 2.252120918 2.292978843 2.337788469 2.387244477
## [136] 2.440077295 2.493295458 2.542634450 2.583120000 2.609648200
## [141] 2.617507226 2.602800769 2.562765830 2.495998143 2.402605083
## [146] 2.284300492 2.144442272 1.987995524 1.821386197 1.652199230
## [151] 1.488680317 1.339031341 1.210549043 1.108731514 1.036534680
## [156] 0.993961539 0.978092227 0.983538209 1.003187351 1.029051195
## [161] 1.053044383 1.067592274 1.066036257 1.042859262 0.993778515
## [166] 0.915754435 0.806953666 0.666688809 0.495342257 0.294268936
## [171] 0.065663784 -0.187624382 -0.462348938 -0.755197849 -1.063190799
## [176] -1.384106153 -1.716881047 -2.061917050 -2.421231697 -2.798423542
## [181] -3.198456475 -3.627303209 -4.091504440 -4.597693099 -5.152104007
## [186] -5.760042771 -6.425225631 -7.148819024 -7.927896305 -8.752900053
## [191] -9.603649712 -10.443818770 -11.215457540 -11.838955945 -12.227678774
## [196] -12.320580193 -12.114117890 -11.663411759 -11.051420727 -10.355606039
## [201] -9.632200177 -8.915710095 -8.224387294 -7.566003626 -6.942199775
## [206] -6.351416570 -5.790823385 -5.257593818 -4.749733846 -4.266560752
## [211] -3.808881820 -3.378916794 -2.980024186 -2.616306449 -2.292169501
## [216] -2.011896478 -1.779270367 -1.597254556 -1.467720939 -1.391205240
## [221] -1.366668988 -1.391257125 -1.460059992 -1.565919622 -1.699361723
## [226] -1.848777977 -2.001005061 -2.142407649 -2.260434784 -2.345390627
## [231] -2.391941897 -2.399841370 -2.373579155 -2.321082995 -2.251927378
## [236] -2.175593152 -2.100162611 -2.031594973 -1.973541600 -1.927572725
## [241] -1.893671611 -1.870864984 -1.857872868 -1.853671012
## overfit table with p=7
gp.1.est6=est.ar.wge(gp.1,p=7,type='burg')
##
## Coefficients of Original polynomial:
## 0.2306 -0.1172 0.0395 -0.0494 0.1561 -0.0018 0.0335
##
## Factor Roots Abs Recip System Freq
## 1-0.7398B 1.3518 0.7398 0.0000
## 1-0.6331B+0.4774B^2 0.6630+-1.2865i 0.6910 0.1743
## 1+1.0777B+0.4672B^2 -1.1533+-0.9001i 0.6836 0.3945
## 1+0.0645B+0.2027B^2 -0.1591+-2.2153i 0.4503 0.2614
##
##
factor.wge(phi=c(rep(0,4),1))
##
## Coefficients of Original polynomial:
## 0.0000 0.0000 0.0000 0.0000 1.0000
##
## Factor Roots Abs Recip System Freq
## 1+1.6180B+1.0000B^2 -0.8090+-0.5878i 1.0000 0.4000
## 1-0.6180B+1.0000B^2 0.3090+-0.9511i 1.0000 0.2000
## 1-1.0000B 1.0000 1.0000 0.0000
##
##
none of the seasonality seem to improve our model. We will try to use a simple ARMA model.
aic5.wge(gp$US.dollar,p=0:15,q=0:2)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 1 2
## Error in aic calculation at 2 1
## Error in aic calculation at 2 2
## Error in aic calculation at 3 0
## Error in aic calculation at 3 1
## Error in aic calculation at 3 2
## Error in aic calculation at 4 0
## Error in aic calculation at 4 2
## Error in aic calculation at 5 0
## Error in aic calculation at 5 1
## Error in aic calculation at 5 2
## Error in aic calculation at 9 2
## Error in aic calculation at 11 1
## Error in aic calculation at 11 2
## Error in aic calculation at 13 1
## Error in aic calculation at 13 2
## Error in aic calculation at 14 0
## Error in aic calculation at 14 2
## Five Smallest Values of aic
## p q aic
## 19 6 0 6.870692
## 20 6 1 6.874527
## 22 7 0 6.874628
## 37 12 0 6.874798
## 21 6 2 6.874926
aic5.wge(gp$US.dollar,type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 1 2
## Error in aic calculation at 2 1
## Error in aic calculation at 2 2
## Error in aic calculation at 3 0
## Error in aic calculation at 3 1
## Error in aic calculation at 3 2
## Error in aic calculation at 4 0
## Error in aic calculation at 4 2
## Error in aic calculation at 5 0
## Error in aic calculation at 5 1
## Error in aic calculation at 5 2
## Five Smallest Values of bic
## p q bic
## 5 1 1 6.907766
## 7 2 0 6.916011
## 14 4 1 6.930958
## 4 1 0 6.949960
## 3 0 2 9.870681
AIC returns AR(6) as the most favorable model. We choose to model it that way.
gp.ar6.est=est.ar.wge(gp$US.dollar,p=6,type='burg')
##
## Coefficients of Original polynomial:
## 1.2345 -0.3455 0.1511 -0.0877 0.2039 -0.1598
##
## Factor Roots Abs Recip System Freq
## 1-0.9950B 1.0050 0.9950 0.0000
## 1-0.5262B+0.5043B^2 0.5218+-1.3080i 0.7101 0.1896
## 1-0.7045B 1.4195 0.7045 0.0000
## 1+0.9912B+0.4521B^2 -1.0961+-1.0051i 0.6724 0.3819
##
##
gp.ar6.est$phi
## [1] 1.23448559 -0.34552524 0.15109462 -0.08765737 0.20389682 -0.15982653
gp.ar6.est$avar
## [1] 937.877
mean(gp$US.dollar)
## [1] 654.6814
gp.ar6.fore=fore.arma.wge(gp$US.dollar,phi=gp.ar6.est$phi, lastn=T, n.ahead=6, limits=T)
# Actual values
gp$US.dollar[(length(gp$US.dollar)-5):length(gp$US.dollar)]
#Forecasted Values
gp.ar6.fore$f
gp.ar6.ase=mean((gp$US.dollar[(length(gp$US.dollar)-5):length(gp$US.dollar)]-gp.ar6.fore$f)^2)
gp.ar6.ase
nahead=6
modelparams2=list(phi=gp.ar6.est$phi,theta=0,s=0,d=0)
wase2_dist=distrollingwindow(gp$US.dollar[1:492],modelparams2,nahead,"AR model")
## Warning: Ignoring unknown aesthetics: label
## Warning: Ignoring unknown aesthetics: label
## Warning: Removed 12 rows containing missing values (geom_path).
## Model appropriateness
ljung.wge(gp.ar6.est$res, p=6)
ljung.wge(gp.ar6.est$res, p=6,K=48)
This was analysis on the International Gold Price. My key research is on the local Indian market for Gold.
Why were the realizations in USD and INR different for the same time slot ? As next steps I will try to analyze the additional factors like currency exchange rate and local dynamics that may be involved that cause the price difference between the international Gold Price and the Indian Gold Price.
For our analysis, additinal data is obtained from below site -
Exchange Rate
Our main source gold.org also had data on gold Prices in Indian currency i.e. INR , Central Bank Reserve Holdings for India and Premium Discount Rate offered in Indian local market over the international Gold Price. Next we will look at these data and see if we can use them for our analysis.
## gold price data
gp$Month_end_dt=as.Date(gp$Month_end_dt,format="%m/%d/%y")
gp$prc_dt_Year=format(gp$Month_end_dt,format="%Y")
gp$prc_dt_Month=months(gp$Month_end_dt)
str(gp)
head(gp)
tail(gp)
## exchange rate data
## source is daily data, convert it to monthly average
usd_inr=read.csv('usd_inr_exc_rt.csv',header=TRUE)
usd_inr$rate_dt=as.Date(usd_inr$observation_date)
usd_inr$usdinr_Year = format(usd_inr$rate_dt, format="%Y")
usd_inr$usdinr_Month = months(usd_inr$rate_dt)
usd_inr_monthly=aggregate(DEXINUS_20200330 ~ usdinr_Month + usdinr_Year , usd_inr , mean)
str(usd_inr_monthly)
head(usd_inr_monthly)
tail(usd_inr_monthly)
plotts.sample.wge(usd_inr_monthly$DEXINUS_20200330)
## central bank reserve holdings
cbr=read.csv('cb_res_hold_monthly.csv',header=T)
cbr_india=cbr[69,]
cbr_df=as.data.frame(t(cbr_india))
cbr_df$cbr_dt=factor(row.names(cbr_df))
## remove top 3 records with header records
cbr_df=tail(cbr_df, -3)
row.names(cbr_df)=NULL
cbr_df$`69`=as.numeric(as.character(cbr_df$`69`))
cbr_df$cbrdt=str_replace(cbr_df$cbr_dt,"\\."," ")
temp=strsplit(cbr_df$cbrdt," ")
temp=do.call(rbind,temp)
cbr_df$cbr_Mon=temp[,1]
cbr_df$cbr_Year=temp[,2]
getmonth=function(month_abb){
calmonth=month.name
names(calmonth)=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
unname(calmonth[month_abb])
}
getmonthnum=function(month_abb){
calmonth=c(1:12)
names(calmonth)=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
unname(calmonth[month_abb])
}
cbr_df$cbr_Month=sapply(cbr_df$cbr_Mon,getmonth)
cbr_df$cbr_Monthnum=sapply(cbr_df$cbr_Mon,getmonthnum)
str(cbr_df)
head(cbr_df)
tail(cbr_df)
##remove null records
cbr_df=head(cbr_df,-1)
plotts.sample.wge(cbr_df$`69`)
## indian premium discounts on gold rate
inr_gold_premdc=read.csv('Indian_premium_discounts.csv',header=F, skip=5)
inr_gold_prem_dc=data.frame(inr_gold_premdc$V2)
names(inr_gold_prem_dc)=c("prem_dc_rt")
inr_gold_prem_dc$prem_dc_dt=as.Date(inr_gold_premdc$V1)
inr_gold_prem_dc$dc_Year=format(inr_gold_prem_dc$prem_dc_dt, format="%Y")
inr_gold_prem_dc$dc_Month = months(inr_gold_prem_dc$prem_dc_dt)
inr_gold_prem_Dc_monthly=aggregate(prem_dc_rt ~ dc_Year + dc_Month, inr_gold_prem_dc , mean)
str(inr_gold_prem_Dc_monthly)
head(inr_gold_prem_Dc_monthly)
tail(inr_gold_prem_Dc_monthly)
plotts.sample.wge(inr_gold_prem_Dc_monthly$prem_dc_rt)
## merge data
## goldprice + exchange_rt + central bank reserve holdings + premium discounts
inr_gp_excrt=merge(gp,usd_inr_monthly,by.x=c("prc_dt_Year","prc_dt_Month"),by.y=c("usdinr_Year","usdinr_Month"))
inr_gp_excrt=inr_gp_excrt[order(inr_gp_excrt$Month_end_dt),]
inr_gp_excrt=tail(inr_gp_excrt,-1)
inr_gp_excrt_cbr=merge(inr_gp_excrt,cbr_df,by.x=c("prc_dt_Year","prc_dt_Month"),by.y=c("cbr_Year","cbr_Month"))
inr_gp_excrt_cbr_premdc=merge(inr_gp_excrt_cbr,inr_gold_prem_Dc_monthly,by.x=c("prc_dt_Year","prc_dt_Month"),by.y=c("dc_Year","dc_Month"))
p=ggplot(inr_gp_excrt_cbr_premdc,aes(Month_end_dt,DEXINUS_20200330))+geom_line()
p=p+theme(panel.background = element_rect(fill = "white",colour = "lightgrey"),plot.title = element_text(size=18,hjust = 0.5))
p=p +scale_x_date(date_labels="%b-%Y") + xlab("Month-Year")
p=p+ylab("Exchange Rate")
p=p+ggtitle("Exchange Rate for last few years")
print(p)
Exchange rate is showing a similar upward trend as the price with as passing year. This variable could be of relative importance in determining local market price of gold.
p=ggplot(inr_gp_excrt_cbr_premdc,aes(Month_end_dt,`69`))+geom_line()
p=p+theme(panel.background = element_rect(fill = "white",colour = "lightgrey"),plot.title = element_text(size=18,hjust = 0.5))
p=p +scale_x_date(date_labels="%b-%Y") + xlab("Month-Year")
p=p+ylab("Central Bank Reserve Holdings")
p=p+ggtitle("Central Bank Reserve Holdings for India")
print(p)
Central bank Reserve Holdings have been mostly reported as 0 except from the year 2017. We see multiple values in the year 2018 and 2019. This data would be considered as discrete and non-timeseries. We will not use this data for our analysis as the value is 0 for most part of training data.
p=ggplot(inr_gp_excrt_cbr_premdc,aes(Month_end_dt,prem_dc_rt))+geom_line()
p=p+theme(panel.background = element_rect(fill = "white",colour = "lightgrey"),plot.title = element_text(size=18,hjust = 0.5))
p=p +scale_x_date(date_labels="%b-%Y") + xlab("Month-Year")
p=p+ylab("Premium Discount Rate")
p=p+ggtitle("Premium Discount Rate for Local Indian market")
print(p)
India has a festival of lights that comes in the month of Oct or November based on lunar calendar and has a day dedicated when people deem it auspicious to buy gold/silver. There is a religious belief that goddess of wealth will bring luck and prosperity to their life. Dec to Feb are key months for weddings and bride’s family tend to go out of their way to gift gold for her wedding. Looks like due to high demand, premium discount offered during these months is mostly negative i.e. local price is higher than international valued price.The trend was reverse for the year 2013.
Since premium discount data is available only from 2012, our final analysis is only on the data from 2012
plotts.sample.wge(inr_gp_excrt_cbr_premdc$Indian.rupee)
## $autplt
## [1] 1.000000000 0.715392092 0.578956091 0.509918214 0.569995879
## [6] 0.534085465 0.471715850 0.451826488 0.483112610 0.436513582
## [11] 0.352009548 0.234318807 0.209124624 0.216900279 0.188516648
## [16] 0.142820354 0.101951463 0.089602316 0.068853543 0.056449446
## [21] 0.036723694 0.043204997 -0.002050761 -0.010024039 -0.039931508
## [26] -0.056526184
##
## $freq
## [1] 0.01041667 0.02083333 0.03125000 0.04166667 0.05208333 0.06250000
## [7] 0.07291667 0.08333333 0.09375000 0.10416667 0.11458333 0.12500000
## [13] 0.13541667 0.14583333 0.15625000 0.16666667 0.17708333 0.18750000
## [19] 0.19791667 0.20833333 0.21875000 0.22916667 0.23958333 0.25000000
## [25] 0.26041667 0.27083333 0.28125000 0.29166667 0.30208333 0.31250000
## [31] 0.32291667 0.33333333 0.34375000 0.35416667 0.36458333 0.37500000
## [37] 0.38541667 0.39583333 0.40625000 0.41666667 0.42708333 0.43750000
## [43] 0.44791667 0.45833333 0.46875000 0.47916667 0.48958333 0.50000000
##
## $db
## [1] 14.32326837 4.20968516 4.26567296 -17.41886036 -1.64596634
## [6] -5.05445210 -13.73928318 -2.17191373 0.07831357 -1.05384147
## [11] -3.11587155 -3.11142481 -5.19936751 -5.57948330 0.46561681
## [16] -8.43398609 -6.41743232 -3.96318777 -3.28113412 -13.37364551
## [21] -1.66646832 -0.48636268 1.54616338 -5.37944055 -5.65684452
## [26] -2.89935701 -15.99873503 -7.20205141 -3.52920216 -8.32225769
## [31] -17.79039649 -11.73253295 -4.73085707 -16.59069352 -15.15970563
## [36] -4.85640241 -13.13346857 -6.31971170 -7.21857708 -6.62279288
## [41] -3.26607597 -17.03473582 -7.34125099 -14.86506657 -4.43306675
## [46] -7.15698666 -7.93656537 -0.80642957
##
## $dbz
## [1] 8.810205 8.209474 7.208116 5.814681 4.067762 2.088269 0.169831
## [8] -1.216747 -1.785421 -1.777111 -1.618703 -1.566175 -1.704314 -2.019112
## [15] -2.424726 -2.773975 -2.911926 -2.783345 -2.484196 -2.184301 -2.026822
## [22] -2.092785 -2.409003 -2.961790 -3.704365 -4.561782 -5.443188 -6.267201
## [29] -6.987792 -7.593897 -8.078038 -8.407381 -8.534326 -8.444346 -8.189720
## [36] -7.868145 -7.572618 -7.357639 -7.230364 -7.155799 -7.071916 -6.917112
## [43] -6.663241 -6.332236 -5.982612 -5.680591 -5.478634 -5.407946
par(mfrow = c(2,1))
t=length(inr_gp_excrt_cbr_premdc$Month_end_dt)
acf(inr_gp_excrt_cbr_premdc$Indian.rupee[1:t/2], plot=TRUE, col="red")
acf(inr_gp_excrt_cbr_premdc$Indian.rupee[(t/2):t], plot=TRUE, col="blue")
Auto-correlations are fast dampening and we see a peak at freq = 0 and around 0.22 and 0.38 for spectral density.Let us analyze arma and seasonal models.
Stationary/NonStatinary
Looking for the 3 conditions of stationarity:
Condition 1 : Mean is not dependent on time : From the realization it is very evident that the gold prices have sored high with time. So this condition is not met.
Condition 2 : Variance is not dependent on time : It is difficult to say from just 1 realization about variance. But if we look at local variances, we do not see changing variability at different times. So we will not comment on this condition for now.
Condition 3 : ACFs are not dependent on time but have similar pattern between to time differences When we look at overall ACF of the entire realization, it looks sinusoidal dampening but when we look into first half of realization and compare it with second half, second half shows faster dampening of ACFs as compared to the first half.This shows presence of some form of cyclic behavior
If there is a thing as splitting a realization into 2 separate timeseries , stationarity can be revised but based on the Gold Prices in INR for the current realization, this data is not stationary
Based on the spectral density analysis we can look for seasonality between 2 to 5. Since it is not very clear, we will overfit the table with p=7
gp_inr=inr_gp_excrt_cbr_premdc$Indian.rupee
gp_inr.est.s7=est.ar.wge(gp_inr,p=7,type="burg")
##
## Coefficients of Original polynomial:
## 0.4995 0.0610 -0.0255 0.2979 0.0329 -0.0639 0.0992
##
## Factor Roots Abs Recip System Freq
## 1-0.9596B 1.0421 0.9596 0.0000
## 1-0.0514B+0.5764B^2 0.0446+-1.3165i 0.7592 0.2446
## 1+1.3003B+0.4911B^2 -1.3238+-0.5327i 0.7008 0.4391
## 1-0.7888B+0.3650B^2 1.0805+-1.2538i 0.6042 0.1368
##
##
factor.wge(phi=c(rep(0,6),1))
##
## Coefficients of Original polynomial:
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000
##
## Factor Roots Abs Recip System Freq
## 1-1.0000B 1.0000 1.0000 0.0000
## 1+0.4450B+1.0000B^2 -0.2225+-0.9749i 1.0000 0.2857
## 1-1.2470B+1.0000B^2 0.6235+-0.7818i 1.0000 0.1429
## 1+1.8019B+1.0000B^2 -0.9010+-0.4339i 1.0000 0.4286
##
##
gp_inr.s3=artrans.wge(gp_inr,phi=c(0,0,1))
plotts.sample.wge(gp_inr.s3,arlimits=T)
## $autplt
## [1] 1.0000000000 0.2368602402 -0.0684514026 -0.3869394342 0.0656157461
## [6] 0.0174793633 -0.0501229717 -0.0784600695 0.1381222187 0.1383367294
## [11] 0.0889399365 -0.0484272323 0.0214065201 0.0823432309 0.0747738925
## [16] 0.0089958958 -0.0495308185 -0.0204729547 -0.0346438562 0.0097394252
## [21] -0.0100010885 0.0470635698 -0.0020103612 -0.0096210905 -0.0003066013
## [26] 0.0317206232
##
## $freq
## [1] 0.01075269 0.02150538 0.03225806 0.04301075 0.05376344 0.06451613
## [7] 0.07526882 0.08602151 0.09677419 0.10752688 0.11827957 0.12903226
## [13] 0.13978495 0.15053763 0.16129032 0.17204301 0.18279570 0.19354839
## [19] 0.20430108 0.21505376 0.22580645 0.23655914 0.24731183 0.25806452
## [25] 0.26881720 0.27956989 0.29032258 0.30107527 0.31182796 0.32258065
## [31] 0.33333333 0.34408602 0.35483871 0.36559140 0.37634409 0.38709677
## [37] 0.39784946 0.40860215 0.41935484 0.43010753 0.44086022 0.45161290
## [43] 0.46236559 0.47311828 0.48387097 0.49462366
##
## $db
## [1] 0.3954797 1.5433097 -9.9270103 -0.4750520 -13.8180548 -0.7009099
## [7] -1.4780345 -0.8469271 -0.6823834 5.3711033 0.1347542 7.1155230
## [13] 1.7279596 -0.6070729 3.6105679 -9.3213574 1.2976869 0.1779244
## [19] 3.7940196 5.3377321 6.3895599 2.6810180 -9.5885155 1.0207880
## [25] -2.7086769 -13.7131356 -9.1737411 -5.4153178 -14.5088087 -21.6501989
## [31] -24.5984483 -18.7231118 -25.8565132 -34.6381154 -3.7338342 -9.5511327
## [37] -2.9490106 -14.0811774 -2.8261767 -1.0541491 -10.5670907 -0.1612869
## [43] -5.6733277 4.8768629 -5.4247898 1.4573440
##
## $dbz
## [1] -0.63397878 -0.86048460 -1.05110595 -0.99345126 -0.58188333
## [6] 0.09039356 0.83228977 1.49268057 1.99837687 2.33167033
## [11] 2.50335550 2.53834485 2.47369299 2.36305527 2.27594130
## [16] 2.27792477 2.39320945 2.58008826 2.74711351 2.79344675
## [21] 2.63858786 2.23102413 1.54508134 0.57541411 -0.66728068
## [26] -2.15957511 -3.86853126 -5.75104469 -7.72909049 -9.59072299
## [31] -10.81962386 -10.81065896 -9.71014407 -8.22597253 -6.81663794
## [36] -5.62533116 -4.64949048 -3.82967385 -3.08858369 -2.36095786
## [41] -1.61994097 -0.88557028 -0.20919037 0.35158081 0.75012207
## [46] 0.95645418
gp_inr.s4=artrans.wge(gp_inr,phi=c(0,0,0,1))
plotts.sample.wge(gp_inr.s4,arlimits=T)
## $autplt
## [1] 1.000000000 0.349477303 0.122409607 -0.093916573 -0.344683805
## [6] -0.027012572 0.015811712 0.134239930 0.147102796 0.066647692
## [11] 0.086881512 0.000497344 0.034561962 0.117596522 0.062782204
## [16] 0.020121377 -0.031201918 -0.083740792 -0.021358643 -0.005880635
## [21] 0.004694650 0.063510893 -0.052856417 0.016432203 0.026533811
## [26] 0.005630788
##
## $freq
## [1] 0.01086957 0.02173913 0.03260870 0.04347826 0.05434783 0.06521739
## [7] 0.07608696 0.08695652 0.09782609 0.10869565 0.11956522 0.13043478
## [13] 0.14130435 0.15217391 0.16304348 0.17391304 0.18478261 0.19565217
## [19] 0.20652174 0.21739130 0.22826087 0.23913043 0.25000000 0.26086957
## [25] 0.27173913 0.28260870 0.29347826 0.30434783 0.31521739 0.32608696
## [31] 0.33695652 0.34782609 0.35869565 0.36956522 0.38043478 0.39130435
## [37] 0.40217391 0.41304348 0.42391304 0.43478261 0.44565217 0.45652174
## [43] 0.46739130 0.47826087 0.48913043 0.50000000
##
## $db
## [1] 3.292489639 3.987293805 -3.670872237 1.244443597 -8.982394025
## [6] 2.651437679 1.919434771 -0.242598040 -2.919421776 6.104637273
## [11] -0.131208781 8.371999316 4.814431186 3.715985712 0.318992137
## [16] -11.491014697 1.396186053 -6.064929579 -1.468028280 -1.915910511
## [21] -0.963284602 -6.179371409 -13.643400303 -16.175309950 -7.430728413
## [26] -13.110848647 -10.709168167 -1.120761008 -1.573422997 -2.418613194
## [31] -6.793120782 0.006502117 -10.042274212 -0.522307177 1.428191843
## [36] 3.138369711 -0.127960611 -3.392210835 -0.463522667 -4.466391543
## [41] -5.447569215 -4.143806785 -14.166758657 -3.539691577 -7.286106231
## [46] -9.985380798
##
## $dbz
## [1] 1.7627399 1.5042411 1.2232395 1.1072906 1.2897137 1.7460673
## [7] 2.3304921 2.8945275 3.3475124 3.6489147 3.7819044 3.7343066
## [13] 3.4919489 3.0419043 2.3813283 1.5277977 0.5264769 -0.5536346
## [19] -1.6420269 -2.6983312 -3.7169780 -4.6763635 -5.4611252 -5.8588482
## [25] -5.7334651 -5.2002948 -4.5111182 -3.8448827 -3.2530141 -2.7052555
## [31] -2.1536053 -1.5840495 -1.0343407 -0.5743748 -0.2744612 -0.1854879
## [37] -0.3340077 -0.7246499 -1.3437046 -2.1614305 -3.1324214 -4.1932584
## [43] -5.2561033 -6.1993912 -6.8683397 -7.1131635
gp_inr.s5=artrans.wge(gp_inr,phi=c(0,0,0,0,1))
plotts.sample.wge(gp_inr.s5,arlimits=T)
## $autplt
## [1] 1.000000000 0.304161457 0.130288756 -0.038287262 0.074814453
## [6] -0.252450678 0.057453612 0.125919743 0.179809119 0.048621541
## [11] 0.066148198 0.003971363 0.030870683 0.085535345 0.075668176
## [16] -0.004321271 -0.052756176 -0.018198289 -0.045993214 -0.009219632
## [21] -0.002471843 0.013607926 -0.012067250 0.021901516 0.002919463
## [26] -0.026936568
##
## $freq
## [1] 0.01098901 0.02197802 0.03296703 0.04395604 0.05494505 0.06593407
## [7] 0.07692308 0.08791209 0.09890110 0.10989011 0.12087912 0.13186813
## [13] 0.14285714 0.15384615 0.16483516 0.17582418 0.18681319 0.19780220
## [19] 0.20879121 0.21978022 0.23076923 0.24175824 0.25274725 0.26373626
## [25] 0.27472527 0.28571429 0.29670330 0.30769231 0.31868132 0.32967033
## [31] 0.34065934 0.35164835 0.36263736 0.37362637 0.38461538 0.39560440
## [37] 0.40659341 0.41758242 0.42857143 0.43956044 0.45054945 0.46153846
## [43] 0.47252747 0.48351648 0.49450549
##
## $db
## [1] 4.581728e+00 5.106740e+00 -1.050689e+00 1.664690e+00 -8.430689e+00
## [6] 3.409745e+00 3.225830e+00 5.150617e-04 -9.483117e+00 4.646993e+00
## [11] -2.943557e+00 6.826714e+00 4.496115e+00 4.267909e+00 -3.557882e+01
## [16] -9.278416e+00 -1.163097e+01 -6.665513e+00 -5.181404e+00 -3.412793e+00
## [21] -9.161286e+00 1.009794e+00 -3.864575e-01 -9.204720e+00 3.938273e+00
## [26] 1.643477e+00 2.173473e+00 -9.718430e+00 -1.714708e+00 -2.075433e+00
## [31] -2.276376e+00 -9.152668e+00 -2.171203e+01 -5.383814e+00 -1.938542e+01
## [36] -1.445437e+01 -1.766437e+01 -1.655334e+01 -4.293316e+00 -2.761739e+01
## [41] -7.192505e-01 -6.832228e+00 4.774275e+00 -5.466268e+00 1.703528e+00
##
## $dbz
## [1] 2.80074954 2.51837924 2.14975107 1.83416998 1.70390419 1.80333383
## [7] 2.06436510 2.36788186 2.61197140 2.73191576 2.68789197 2.44877944
## [13] 1.98549384 1.27730560 0.33356078 -0.76323831 -1.79940254 -2.44365935
## [19] -2.47820737 -2.02773371 -1.38900713 -0.77362003 -0.27282013 0.08035904
## [25] 0.26502254 0.25727447 0.03283522 -0.42530660 -1.12055662 -2.03894164
## [31] -3.14721753 -4.39155600 -5.69249966 -6.92404246 -7.86563203 -8.18649119
## [37] -7.64482758 -6.38436613 -4.80608426 -3.23841758 -1.85284997 -0.72101233
## [43] 0.13200758 0.70007268 0.98349156
gp_inr.s6=artrans.wge(gp_inr,phi=c(0,0,0,0,0,1))
plotts.sample.wge(gp_inr.s6,arlimits=T)
## $autplt
## [1] 1.0000000000 0.3220562339 0.0003933926 0.0388475387 0.2172803174
## [6] 0.1689920175 -0.2034649608 0.0255002239 0.1641702800 0.1659573812
## [11] 0.0866635434 -0.0286350039 0.0211335253 0.0777119435 0.0523896948
## [16] -0.0134293418 -0.0311430106 -0.0044159821 -0.0178467889 -0.0213304662
## [21] -0.0502507186 0.0359314986 0.0043424438 0.0172523184 -0.0293702928
## [26] -0.0199396522
##
## $freq
## [1] 0.01111111 0.02222222 0.03333333 0.04444444 0.05555556 0.06666667
## [7] 0.07777778 0.08888889 0.10000000 0.11111111 0.12222222 0.13333333
## [13] 0.14444444 0.15555556 0.16666667 0.17777778 0.18888889 0.20000000
## [19] 0.21111111 0.22222222 0.23333333 0.24444444 0.25555556 0.26666667
## [25] 0.27777778 0.28888889 0.30000000 0.31111111 0.32222222 0.33333333
## [31] 0.34444444 0.35555556 0.36666667 0.37777778 0.38888889 0.40000000
## [37] 0.41111111 0.42222222 0.43333333 0.44444444 0.45555556 0.46666667
## [43] 0.47777778 0.48888889 0.50000000
##
## $db
## [1] 5.38565908 5.85676367 0.52506320 1.76575722 -9.06529053
## [6] 3.01125674 3.37185501 0.04731745 -24.03430609 2.01453615
## [11] -6.87446926 3.18543248 1.13849525 -0.02164246 -4.67787645
## [16] -4.62564911 1.73597211 2.83324194 3.59756210 4.19837341
## [21] -2.19676075 2.09724251 4.16071994 -6.03669438 -0.79940753
## [26] -1.19276813 0.71638812 -8.88013752 -13.75612312 -16.68663755
## [31] -10.41808854 -20.13710295 -13.07002948 0.06166128 -2.68954815
## [36] 1.00150446 -10.85107917 1.40970824 -2.96533284 -3.01881999
## [41] -4.72509127 -6.92797753 -3.64385886 -9.99412341 -15.69313461
##
## $dbz
## [1] 3.486678866 3.187255045 2.750950432 2.268622811 1.843932379
## [6] 1.548219904 1.379876538 1.269771051 1.126373373 0.877565426
## [11] 0.491590107 -0.008829851 -0.524447002 -0.865959507 -0.821132964
## [16] -0.328179255 0.448032953 1.260226014 1.924358635 2.344178196
## [21] 2.482334427 2.333318366 1.907739881 1.223607801 0.300607762
## [26] -0.842910896 -2.184167270 -3.664123660 -5.101065536 -6.081303416
## [31] -6.139802645 -5.337713218 -4.199886713 -3.139149369 -2.329630395
## [36] -1.819559400 -1.611031429 -1.691763423 -2.044327198 -2.645384658
## [41] -3.457777937 -4.413076399 -5.381657054 -6.145519234 -6.443158666
gp_inr.s7=artrans.wge(gp_inr,phi=c(0,0,0,0,0,0,1))
plotts.sample.wge(gp_inr.s7,arlimits=T)
## $autplt
## [1] 1.000000000 0.352891256 0.113413499 0.039018031 0.333115703
## [6] 0.276184591 0.080182395 -0.208535433 0.156366286 0.194724537
## [11] 0.170170911 0.001646648 0.007782294 0.067272568 0.033634570
## [16] 0.002016885 -0.015050222 -0.013793181 0.004890716 -0.040987995
## [21] -0.030202808 0.037025768 -0.003840837 -0.014941820 -0.006335842
## [26] -0.036593075
##
## $freq
## [1] 0.01123596 0.02247191 0.03370787 0.04494382 0.05617978 0.06741573
## [7] 0.07865169 0.08988764 0.10112360 0.11235955 0.12359551 0.13483146
## [13] 0.14606742 0.15730337 0.16853933 0.17977528 0.19101124 0.20224719
## [19] 0.21348315 0.22471910 0.23595506 0.24719101 0.25842697 0.26966292
## [25] 0.28089888 0.29213483 0.30337079 0.31460674 0.32584270 0.33707865
## [31] 0.34831461 0.35955056 0.37078652 0.38202247 0.39325843 0.40449438
## [37] 0.41573034 0.42696629 0.43820225 0.44943820 0.46067416 0.47191011
## [43] 0.48314607 0.49438202
##
## $db
## [1] 6.5516235 7.0921568 2.0255716 2.5526692 -9.1148981 2.4694030
## [7] 3.3598289 0.2937254 -14.7060121 -0.9541349 -8.7395657 -2.5087636
## [13] -6.3656252 -15.4428320 -8.6657397 -9.0968159 3.5859684 4.7396926
## [19] 6.0029418 6.1283731 1.7840907 -8.9549594 -0.9734027 -7.5344271
## [25] -15.5724089 -14.5249871 -8.6126645 -7.1591492 -5.7953841 -6.5037687
## [31] -0.5309155 -9.2950509 0.1007360 -4.7270831 -0.6931027 -7.1519147
## [37] -9.5978716 -19.0727649 -20.1902594 -4.6939343 -8.9834828 2.8405275
## [43] -6.4625891 1.1356465
##
## $dbz
## [1] 4.68590076 4.31784775 3.74828626 3.04591877 2.30161855 1.60115962
## [7] 0.98337683 0.41955315 -0.15722301 -0.78885782 -1.42317107 -1.86241432
## [13] -1.82260245 -1.19135087 -0.18152934 0.88172844 1.77190149 2.37616590
## [19] 2.64522708 2.55690097 2.09913468 1.26526032 0.05969103 -1.47789220
## [25] -3.20732776 -4.77201419 -5.62032308 -5.51787414 -4.85310529 -4.10434634
## [31] -3.51881347 -3.18933195 -3.15137500 -3.41740414 -3.96723187 -4.69741829
## [37] -5.33278778 -5.43353336 -4.76327207 -3.59970255 -2.38156450 -1.37321187
## [43] -0.67837778 -0.32772984
From above various seasonal models, s=6 seems to be most favored as the lags>1 of the transformed data is mostly within the limits, resembling white noise.So we will go with a s=6 seasonal model
aic5.wge(gp_inr.s6,p=0:14,q=0:2)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 36 11 2 17.32260
## 39 12 2 17.33317
## 42 13 2 17.35587
## 45 14 2 17.38292
## 40 13 0 17.40411
AIC favors a p=11,q=2 model for the seasonal transformed data.We will next try ARIMA(11,0,2) with s=6 model
gp_inr.est.ar11_2_s6=est.arma.wge(gp_inr.s6,p=11,q=2)
##
## Coefficients of Original polynomial:
## -0.4716 -0.4490 0.3279 0.2915 0.2846 -0.4223 -0.2563 -0.1581 0.3109 0.4520 0.3421
##
## Factor Roots Abs Recip System Freq
## 1-0.2378B+0.9051B^2 0.1314+-1.0429i 0.9514 0.2301
## 1-1.5899B+0.9035B^2 0.8798+-0.5768i 0.9506 0.0924
## 1-0.9362B 1.0681 0.9362 0.0000
## 1+0.4594B+0.8649B^2 -0.2656+-1.0420i 0.9300 0.2897
## 1+1.2788B+0.7527B^2 -0.8495+-0.7790i 0.8676 0.3819
## 1+1.4974B+0.6863B^2 -1.0910+-0.5167i 0.8284 0.4296
##
##
#phi
gp_inr.est.ar11_2_s6$phi
## [1] -0.4716497 -0.4490077 0.3279052 0.2914971 0.2845965 -0.4222890
## [7] -0.2562766 -0.1581471 0.3108551 0.4520370 0.3420648
#theta
gp_inr.est.ar11_2_s6$theta
## [1] -0.9291278 -0.9341934
# white noise variance
gp_inr.est.ar11_2_s6$avar
## [1] 24433951
#mean
mean(gp_inr)
## [1] 84175.84
Getting model estimates for ARMA(11,2) , next step is to forecast last 6 values of Gold Price in Indian Currency using this model.
gp_inr.fore.ar11_2_s6=fore.aruma.wge(gp_inr,phi=gp_inr.est.ar11_2_s6$phi,theta=gp_inr.est.ar11_2_s6$theta,s=6,lastn=T,n.ahead=6,limits=T)
## Actual values
gp_inr[(length(gp_inr)-5):length(gp_inr)]
#Forecasted Values
gp_inr.fore.ar11_2_s6$f
gp_inr.ase.ar11_2_s6=mean((gp_inr[(length(gp_inr)-5):length(gp_inr)]-gp_inr.fore.ar11_2_s6$f)^2)
gp_inr.ase.ar11_2_s6
## Model appropriateness
ljung.wge(gp_inr.est.ar11_2_s6$res, p=6)
ljung.wge(gp_inr.est.ar11_2_s6$res, p=6,K=48)
## evidence that residuals resemble white noise
ASE for the last 6 predictions is 63,526,675. Also from ljung test on the residuals, it is confirmed they resemble white noise based on the p-val > 0.05 for K=24 and K=48. Next is to valdiate this model on more values by using Rolling Window Strategy.
nahead=6
p=11
q=2
windowsize=p+q+1
modelparams2=list(phi=gp_inr.est.ar11_2_s6$phi,theta=gp_inr.est.ar11_2_s6$theta,s=6,d=0)
gp_inr.rw.ar11_2_s6=distrollingwindow_v2(gp_inr[6:97],modelparams2,nahead,"ARIMA(11,0,2) S=6 model",windowsize,"Univariate")
## [1] "inputsize"
## [1] 92
## [1] "start_index"
## [1] 1 7 13 19 25 31 37 43 49 55 61 67
## [1] "end_index"
## [1] 20 26 32 38 44 50 56 62 68 74 80 86
## [1] 92
## [1] 84138.77 71982.45 76520.70 86086.59 83121.32 72320.72
## [1] 77718.73 75289.62 79941.53 77221.02 78031.11 76032.45
## [1] 83171.21 73372.32 68819.90 77755.85 79113.19 71099.51
## [1] 71469.87 73778.71 73869.21 70721.19 71042.00 74788.01
## [1] 75612.62 77119.22 72843.44 78642.79 77724.72 78784.76
## [1] 82457.94 75104.06 87098.65 82720.68 83752.63 87861.24
## [1] 87595.15 87903.50 87393.38 79763.54 88036.21 89149.01
## [1] 87779.12 81362.51 79445.53 81165.76 84428.14 80439.99
## [1] 79652.61 80343.48 82879.72 81855.10 83423.42 83549.30
## [1] 79242.60 89705.07 88805.62 84042.57 87965.15 91381.69
## [1] 86762.77 89619.18 88550.88 87762.29 86972.48 89922.43
## [1] NA NA NA NA NA NA
## [1] "predHolder"
## [1] NA NA NA NA NA NA NA NA
## [9] NA NA NA NA NA NA NA NA
## [17] NA NA NA NA 84138.77 71982.45 76520.70 86086.59
## [25] 83121.32 72320.72 77718.73 75289.62 79941.53 77221.02 78031.11 76032.45
## [33] 83171.21 73372.32 68819.90 77755.85 79113.19 71099.51 71469.87 73778.71
## [41] 73869.21 70721.19 71042.00 74788.01 75612.62 77119.22 72843.44 78642.79
## [49] 77724.72 78784.76 82457.94 75104.06 87098.65 82720.68 83752.63 87861.24
## [57] 87595.15 87903.50 87393.38 79763.54 88036.21 89149.01 87779.12 81362.51
## [65] 79445.53 81165.76 84428.14 80439.99 79652.61 80343.48 82879.72 81855.10
## [73] 83423.42 83549.30 79242.60 89705.07 88805.62 84042.57 87965.15 91381.69
## [81] 86762.77 89619.18 88550.88 87762.29 86972.48 89922.43 NA NA
## [89] NA NA NA NA
## [1] 92
## [1] "output"
## [1] "======="
## timeseq pred actuals
## 87 87 NA 90427.5
## 88 88 NA 89662.2
## 89 89 NA 105070.6
## 90 90 NA 106207.2
## 91 91 NA 107869.2
## 92 92 NA NA
## [1] 28499742 13594667 39892281 18517904 83753834 27126870 38913333
## [8] 19586911 18685885 14653160 132851321 NA
## [1] "WindowedASE"
## [1] NA
## [1] "std deviation of Windowed ASE"
## [1] NA
#gp_summary=data.frame("model"="ARIMA(11,0,2) with s=6","ASE"=gp_inr.ase.ar11_2_s6,"RollingASE"=gp_inr.rw.ar11_2_s6$WASE)
gp_summary=data.frame("model"="ARIMA(11,0,2) with s=6","ASE"=gp_inr.ase.ar11_2_s6,"RollingASE"=41950665)
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
Rolling Window ASE is 41,950,665. This indicates model performs even better on overall data. Also we can see from the plot of Rolling Window Test analysis predictions are following the trend of the actual data.
D factor ? As we study the factors of different seasonal models above there is only 1 (1-B) term. So what if we tried a ARIMA model without seasonality but just d=1 factor.
gp_inr.d1=artrans.wge(gp_inr,phi=1)
plotts.sample.wge(gp_inr.d1,arlimits=T)
## d minus data shows a peak at 0.21, so lets try seasonality of s=5
gp_inr.d1.s5=artrans.wge(gp_inr.d1,phi=c(0,0,0,0,1))
plotts.sample.wge(gp_inr.d1.s5)
## auto correlations of residuals do not resemble white noise based on plotts.sample.wge
## lets look at what aic favors
aic5.wge(gp_inr.d1,p=0:14,q=0:2)
It is better to not totally rule out seasonality factor, so after transofrming data for d=1, applied seasonality but autocorrelations of residuals do not seem to resemble white noise. We favor models that have residuals resembling white noise. So d-transformed data was then fed into AIC to get estimates of other ARIMA model parameters. AIC of d transformed data favors a AR(3) model. So our final model is ARIMA(3,1,0).
gp_inr.est.ar3_1_0=est.ar.wge(gp_inr.d1,p=3)
##
## Coefficients of Original polynomial:
## -0.4693 -0.3862 -0.3510
##
## Factor Roots Abs Recip System Freq
## 1-0.2027B+0.5224B^2 0.1940+-1.3699i 0.7227 0.2276
## 1+0.6720B -1.4881 0.6720 0.5000
##
##
##
## Coefficients of Original polynomial:
## -0.4693 -0.3862 -0.3510
##
## Factor Roots Abs Recip System Freq
## 1-0.2027B+0.5224B^2 0.1940+-1.3699i 0.7227 0.2276
## 1+0.6720B -1.4881 0.6720 0.5000
##
##
#phi
gp_inr.est.ar3_1_0$phi
## [1] -0.4693479 -0.3861761 -0.3510271
# white noise variance
gp_inr.est.ar3_1_0$avar
## [1] 22400923
#mean
mean(gp_inr)
## [1] 84175.84
Getting model estimates for AR(3) , next step is to forecast last 6 values of Gold Price in Indian Currency using this model.
gp_inr.fore.ar3_1_0=fore.aruma.wge(gp_inr,phi=gp_inr.est.ar3_1_0$phi,s=0,d=1,lastn=T,n.ahead=6,limits=T)
## Actual values
gp_inr[(length(gp_inr)-5):length(gp_inr)]
#Forecasted Values
gp_inr.fore.ar3_1_0$f
gp_inr.ase.ar3_1_0=mean((gp_inr[(length(gp_inr)-5):length(gp_inr)]-gp_inr.fore.ar3_1_0$f)^2)
gp_inr.ase.ar3_1_0
## Model appropriateness
ljung.wge(gp_inr.est.ar3_1_0$res, p=6)
ljung.wge(gp_inr.est.ar3_1_0$res, p=6,K=48)
## evidence that residual resembles whote noise
ASE for the last 6 predictions is 123,827,211, almost double of what was found in previous model . Also from ljung test on the residuals, it is confirmed they resemble white noise based on the p-val > 0.05 for K=24 and K=48. Next is to validate this model on more values by using Rolling Window Strategy.
nahead=6
p=3
q=0
d=1
windowsize=p+q+d+1
modelparams2=list(phi=gp_inr.est.ar3_1_0$phi,theta=0,s=0,d=1)
gp_inr.rw.ar3_1_0=distrollingwindow_v2(gp_inr[3:97],modelparams2,nahead,"ARIMA(3,1,0) model",windowsize,"Univariate")
## [1] "inputsize"
## [1] 95
## [1] "start_index"
## [1] 1 7 13 19 25 31 37 43 49 55 61 67 73 79
## [1] "end_index"
## [1] 11 17 23 29 35 41 47 53 59 65 71 77 83 89
## [1] 95
## [1] 87175.25 88880.18 90627.07 86902.32 87377.44 87979.65
## [1] 81894.42 84498.29 81413.91 80595.22 81256.56 82345.02
## [1] 79650.52 80292.87 82222.84 80621.88 80402.50 80446.24
## [1] 78245.71 77786.38 78116.16 77502.63 77824.47 77794.59
## [1] 74284.00 74667.46 74912.41 74963.84 74710.50 74723.56
## [1] 75481.43 76167.36 74595.19 75054.14 75205.09 75508.88
## [1] 77896.56 77306.41 76549.12 78779.49 78232.27 77893.62
## [1] 84402.48 81047.97 84596.92 84753.03 84486.76 83305.67
## [1] 82991.05 83609.64 85218.55 83762.81 83607.59 83677.85
## [1] 81627.45 81383.94 80800.30 81026.30 81231.09 81252.57
## [1] 85719.62 84986.81 85062.46 86009.71 85793.14 85502.43
## [1] 86268.96 85743.92 85609.81 86092.85 86102.23 85958.36
## [1] 88522.72 88844.95 88026.77 88558.21 88511.63 88615.47
## [1] NA NA NA NA NA NA
## [1] "predHolder"
## [1] NA NA NA NA NA NA NA NA
## [9] NA NA NA 87175.25 88880.18 90627.07 86902.32 87377.44
## [17] 87979.65 81894.42 84498.29 81413.91 80595.22 81256.56 82345.02 79650.52
## [25] 80292.87 82222.84 80621.88 80402.50 80446.24 78245.71 77786.38 78116.16
## [33] 77502.63 77824.47 77794.59 74284.00 74667.46 74912.41 74963.84 74710.50
## [41] 74723.56 75481.43 76167.36 74595.19 75054.14 75205.09 75508.88 77896.56
## [49] 77306.41 76549.12 78779.49 78232.27 77893.62 84402.48 81047.97 84596.92
## [57] 84753.03 84486.76 83305.67 82991.05 83609.64 85218.55 83762.81 83607.59
## [65] 83677.85 81627.45 81383.94 80800.30 81026.30 81231.09 81252.57 85719.62
## [73] 84986.81 85062.46 86009.71 85793.14 85502.43 86268.96 85743.92 85609.81
## [81] 86092.85 86102.23 85958.36 88522.72 88844.95 88026.77 88558.21 88511.63
## [89] 88615.47 NA NA NA NA NA NA
## [1] 95
## [1] "output"
## [1] "======="
## timeseq pred actuals
## 90 90 NA 90427.5
## 91 91 NA 89662.2
## 92 92 NA 105070.6
## 93 93 NA 106207.2
## 94 94 NA 107869.2
## 95 95 NA NA
## [1] 67068354 18516269 9464191 10222614 5432451 10299657 65388077
## [8] 5279497 6867536 11068444 3463033 5384288 123330993 NA
## [1] "WindowedASE"
## [1] NA
## [1] "std deviation of Windowed ASE"
## [1] NA
#gp_summary=rbind(gp_summary,data.frame("model"="ARIMA(3,1,0)","ASE"=gp_inr.ase.ar3_1_0,"RollingASE"=gp_inr.rw.ar3_1_0$WASE))
gp_summary=rbind(gp_summary,data.frame("model"="ARIMA(3,1,0)","ASE"=gp_inr.ase.ar3_1_0,"RollingASE"=32548667))
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
## 2 ARIMA(3,1,0) 62612580 32548667
Rolling Window ASE for ARIMA(3,1,0) model was found to be 32,548,667 , which is smaller than our previous model. So although this model didnot perform well on just the last 6 predictions, it seems to have performed better overall.
Out of curiosity Stationary model?
Get model estimates starting with AIC.
aic5.wge(gp_inr)
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 1 2
## Five Smallest Values of aic
## p q aic
## 13 4 0 17.02877
## 16 5 0 17.04861
## 14 4 1 17.04880
## 8 2 1 17.06123
## 5 1 1 17.06284
gp_inr.est.ar2_1=est.arma.wge(gp_inr.d1,p=2,q=1)
##
## Coefficients of Original polynomial:
## 0.1609 -0.1397
##
## Factor Roots Abs Recip System Freq
## 1-0.1609B+0.1397B^2 0.5759+-2.6131i 0.3737 0.2155
##
##
#phi
gp_inr.est.ar2_1$phi
## [1] 0.1608582 -0.1396656
#theta
gp_inr.est.ar2_1$theta
## [1] 0.6523612
# white noise variance
gp_inr.est.ar2_1$avar
## [1] 23216679
#mean
mean(gp_inr)
## [1] 84175.84
How does forecast look like for ARMA(2,1) model.
gp_inr.fore.ar2_1=fore.aruma.wge(gp_inr,phi=gp_inr.est.ar2_1$phi,theta=gp_inr.est.ar2_1$theta,s=0,d=0,lastn=T,n.ahead=6,limits=T)
#gp_inr.fore.ar2_1=fore.aruma.wge(gp_inr[87:96],phi=gp_inr.est.ar2_1$phi,theta=gp_inr.est.ar2_1$theta,s=0,d=0,lastn=T,n.ahead=6,limits=T)
## Actual values
gp_inr[(length(gp_inr)-5):length(gp_inr)]
#Forecasted Values
gp_inr.fore.ar2_1$f
gp_inr.ase.ar2_1=mean((gp_inr[(length(gp_inr)-5):length(gp_inr)]-gp_inr.fore.ar2_1$f)^2)
gp_inr.ase.ar2_1
## Model appropriateness
ljung.wge(gp_inr.est.ar3_1_0$res, p=6)
ljung.wge(gp_inr.est.ar3_1_0$res, p=6,K=48)
## evidence that residual resembles whote noise
ASE for the last 6 predictions is 450,302,547, this is very high compared to last 2 non-stationary models. . Also from ljung test on the residuals, it is confirmed they resemble white noise based on the p-val > 0.05 for K=24 and K=48. Next is to validate this model on more values by using Rolling Window Strategy.
nahead=6
p=2
q=1
d=0
windowsize=p+q+d+1
modelparams2=list(phi=gp_inr.est.ar2_1$phi,theta=gp_inr.est.ar2_1$theta,s=0,d=0)
gp_inr.rw.ar2_1=distrollingwindow_v2(gp_inr[4:97],modelparams2,nahead,"ARMA(2,1) model",windowsize,"Univariate")
## [1] "inputsize"
## [1] 94
## [1] "start_index"
## [1] 1 7 13 19 25 31 37 43 49 55 61 67 73 79
## [1] "end_index"
## [1] 10 16 22 28 34 40 46 52 58 64 70 76 82 88
## [1] 94
## [1] 80543.88 85856.99 86744.01 86144.64 85924.34 85972.61
## [1] 88568.27 85717.41 83825.08 83918.85 84198.22 84230.07
## [1] 79455.93 80821.52 80890.49 80710.85 80672.33 80691.22
## [1] 78218.98 78333.63 78102.70 78049.54 78073.25 78084.48
## [1] 76756.73 76158.36 75841.88 75874.55 75924.00 75927.40
## [1] 75439.55 74880.82 74791.20 74854.82 74877.57 74872.35
## [1] 81510.71 78370.08 78015.42 78397.01 78507.93 78472.48
## [1] 78671.73 80788.63 82138.97 82060.53 81859.31 81837.90
## [1] 79555.93 82486.49 83253.95 82968.10 82814.94 82830.22
## [1] 85915.27 83623.70 82599.73 82755.07 82923.07 82928.40
## [1] 82505.66 82976.65 83779.71 83843.11 83741.15 83715.89
## [1] 87059.72 86335.37 86194.88 86273.45 86305.71 86299.92
## [1] 96644.17 92288.63 90561.89 90892.45 91186.79 91187.97
## [1] NA NA NA NA NA NA
## [1] "predHolder"
## [1] NA NA NA NA NA NA NA NA
## [9] NA NA 80543.88 85856.99 86744.01 86144.64 85924.34 85972.61
## [17] 88568.27 85717.41 83825.08 83918.85 84198.22 84230.07 79455.93 80821.52
## [25] 80890.49 80710.85 80672.33 80691.22 78218.98 78333.63 78102.70 78049.54
## [33] 78073.25 78084.48 76756.73 76158.36 75841.88 75874.55 75924.00 75927.40
## [41] 75439.55 74880.82 74791.20 74854.82 74877.57 74872.35 81510.71 78370.08
## [49] 78015.42 78397.01 78507.93 78472.48 78671.73 80788.63 82138.97 82060.53
## [57] 81859.31 81837.90 79555.93 82486.49 83253.95 82968.10 82814.94 82830.22
## [65] 85915.27 83623.70 82599.73 82755.07 82923.07 82928.40 82505.66 82976.65
## [73] 83779.71 83843.11 83741.15 83715.89 87059.72 86335.37 86194.88 86273.45
## [81] 86305.71 86299.92 96644.17 92288.63 90561.89 90892.45 91186.79 91187.97
## [89] NA NA NA NA NA NA
## [1] 94
## [1] "output"
## [1] "======="
## timeseq pred actuals
## 89 89 NA 90427.5
## 90 90 NA 89662.2
## 91 91 NA 105070.6
## 92 92 NA 106207.2
## 93 93 NA 107869.2
## 94 94 NA NA
## [1] 46532009 21448392 10679918 11498908 10244862 12416486 47815695 14654659
## [9] 3966513 10283173 7976787 4183145 53173888 NA
## [1] "WindowedASE"
## [1] NA
## [1] "std deviation of Windowed ASE"
## [1] NA
#gp_summary=rbind(gp_summary,data.frame("model"="ARMA(2,1)","ASE"=gp_inr.ase.ar2_1,"RollingASE"=gp_inr.rw.ar2_1$WASE))
gp_summary=rbind(gp_summary,data.frame("model"="ARMA(2,1)","ASE"=gp_inr.ase.ar2_1,"RollingASE"=25758608))
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
## 2 ARIMA(3,1,0) 62612580 32548667
## 3 ARMA(2,1) 423414174 25758608
Surprisingly, Rolling Window ASE for the stationary model is 25,758,608, which is the smallest of all the models found so far. Model did not perform well on last 6 predictions but it seemed to perform overall better.
###If we additional regressors, does that improve foreasting ?
VAR models allow use of additional exogeneous variables that may help improve forecasting.
From earlier EDA, it was found that exchange rate and premium discount rate in India could be helpful in forecasting Gold price for Indian market.
gp_inr_var=data.frame(inr_gp_excrt_cbr_premdc$Indian.rupee,inr_gp_excrt_cbr_premdc$DEXINUS_20200330,inr_gp_excrt_cbr_premdc$prem_dc_rt)
names(gp_inr_var)=c("Indian.rupee","exchange_rt","prem_dc_rt")
dim(gp_inr_var)
## [1] 96 3
str(gp_inr_var)
## 'data.frame': 96 obs. of 3 variables:
## $ Indian.rupee: num 85476 90330 92222 85728 84587 ...
## $ exchange_rt : num 51.7 55.5 52 46.8 46.4 ...
## $ prem_dc_rt : num -10.34 -9.44 -4.62 -4.19 -5.94 ...
head(gp_inr_var)
## Indian.rupee exchange_rt prem_dc_rt
## 1 85476.4 51.69000 -10.338095
## 2 90330.0 55.49348 -9.443478
## 3 92222.1 52.04476 -4.619048
## 4 85728.0 46.83923 -4.190476
## 5 84587.2 46.36500 -5.940000
## 6 88374.5 52.90545 -7.240909
tail(gp_inr_var)
## Indian.rupee exchange_rt prem_dc_rt
## 91 94352.5 69.38800 -14.210000
## 92 90427.5 69.48952 -5.704762
## 93 89662.2 66.74870 -4.017391
## 94 105070.6 64.68524 -6.057143
## 95 106207.2 67.92130 -18.617391
## 96 107869.2 67.91524 -57.645000
VAR model ignorring the trend.
## lag.max it will look up to K=1-6 here
gp_inr_var_train=gp_inr_var[1:(nrow(gp_inr_var)-6),]
VARselect(gp_inr_var_train,lag.max = 6,type='const', season = NULL, exogen = NULL)
## VARselect picks p=5 ( based on minimum value of AIC(n))
lsfit=VAR(gp_inr_var_train,p=5,type='const')
preds=predict(lsfit,n.ahead=6)
preds
fore.Indian.rupee=preds$fcst$Indian.rupee[,1]
actual.Indian.rupee=gp_inr_var[(nrow(gp_inr_var)-5):nrow(gp_inr_var),1]
var.ase=mean((fore.Indian.rupee-actual.Indian.rupee)^2)
print("var ase")
var.ase
ASE on last 6 predictions for VAR model with no trend was found to be 124,283,409. Well can’t beat the univariate model at this rate. Lets look at how it performs on Rolling Window Testing.
nahead=6
windowsize=18
modelparams2=list(type='const',p=5)
var_no_trend_rw=distrollingwindow_v2(gp_inr_var[2:97,],modelparams2,nahead,"VAR with no trend model",windowsize,"VAR")
## [1] "inputsize"
## [1] 96
## [1] "start_index"
## [1] 1 7 13 19 25 31 37 43 49 55 61 67
## [1] "end_index"
## [1] 24 30 36 42 48 54 60 66 72 78 84 90
## [1] 96
## [1] "inside VAR"
## [1] "finished VAR Rolling Window. Compiling results"
## [1] "predHolder"
## [1] NA NA NA NA NA NA NA
## [8] NA NA NA NA NA NA NA
## [15] NA NA NA NA NA NA NA
## [22] NA NA NA 72549.24 85777.07 79017.47 78315.45
## [29] 93510.82 76550.05 80830.71 77711.40 80122.65 73946.08 76972.06
## [36] 83221.12 90100.42 102066.40 93057.41 71389.11 40321.61 11678.76
## [43] 44319.82 100303.24 41919.48 153154.82 -45157.23 197623.48 79798.92
## [50] 84489.59 84955.47 88566.63 89861.83 91027.84 95331.81 89870.07
## [57] 89611.81 92635.17 91832.05 107907.09 64768.35 49885.17 68513.26
## [64] 77039.30 99160.57 132926.72 76105.56 86663.53 86025.96 71073.50
## [71] 90986.50 67454.65 89154.52 88943.80 91991.23 91554.85 91503.45
## [78] 92753.53 83341.29 86816.38 83539.50 86178.09 85218.59 87040.24
## [85] 84204.54 92227.91 78483.00 95507.50 77501.15 100479.82 98720.99
## [92] 102400.34 107134.12 103816.55 104466.42 107106.35
## [1] "array"
## [1] 96
## [1] "output"
## [1] "======="
## seq.int(length(predHolder)) predHolder
## 1 1 NA
## 2 2 NA
## 3 3 NA
## 4 4 NA
## 5 5 NA
## 6 6 NA
## 7 7 NA
## 8 8 NA
## 9 9 NA
## 10 10 NA
## 11 11 NA
## 12 12 NA
## 13 13 NA
## 14 14 NA
## 15 15 NA
## 16 16 NA
## 17 17 NA
## 18 18 NA
## 19 19 NA
## 20 20 NA
## 21 21 NA
## 22 22 NA
## 23 23 NA
## 24 24 NA
## 25 25 72549.24
## 26 26 85777.07
## 27 27 79017.47
## 28 28 78315.45
## 29 29 93510.82
## 30 30 76550.05
## 31 31 80830.71
## 32 32 77711.40
## 33 33 80122.65
## 34 34 73946.08
## 35 35 76972.06
## 36 36 83221.12
## 37 37 90100.42
## 38 38 102066.40
## 39 39 93057.41
## 40 40 71389.11
## 41 41 40321.61
## 42 42 11678.76
## 43 43 44319.82
## 44 44 100303.24
## 45 45 41919.48
## 46 46 153154.82
## 47 47 -45157.23
## 48 48 197623.48
## 49 49 79798.92
## 50 50 84489.59
## 51 51 84955.47
## 52 52 88566.63
## 53 53 89861.83
## 54 54 91027.84
## 55 55 95331.81
## 56 56 89870.07
## 57 57 89611.81
## 58 58 92635.17
## 59 59 91832.05
## 60 60 107907.09
## 61 61 64768.35
## 62 62 49885.17
## 63 63 68513.26
## 64 64 77039.30
## 65 65 99160.57
## 66 66 132926.72
## 67 67 76105.56
## 68 68 86663.53
## 69 69 86025.96
## 70 70 71073.50
## 71 71 90986.50
## 72 72 67454.65
## 73 73 89154.52
## 74 74 88943.80
## 75 75 91991.23
## 76 76 91554.85
## 77 77 91503.45
## 78 78 92753.53
## 79 79 83341.29
## 80 80 86816.38
## 81 81 83539.50
## 82 82 86178.09
## 83 83 85218.59
## 84 84 87040.24
## 85 85 84204.54
## 86 86 92227.91
## 87 87 78483.00
## 88 88 95507.50
## 89 89 77501.15
## 90 90 100479.82
## 91 91 98720.99
## 92 92 102400.34
## 93 93 107134.12
## 94 94 103816.55
## 95 95 104466.42
## 96 96 107106.35
## [1] "change names of output"
## [1] "create toplot dataframe"
## timeseq pred actuals
## 91 91 98720.99 90427.5
## 92 92 102400.34 89662.2
## 93 93 107134.12 105070.6
## 94 94 103816.55 106207.2
## 95 95 104466.42 107869.2
## 96 96 107106.35 NA
## [1] 62119713 21191860 1108111620 5984601744 64765528 162267260
## [7] 757639303 111946445 31857789 7446405 224464431 NA
## [1] "WindowedASE"
## [1] NA
## [1] "std deviation of Windowed ASE"
## [1] NA
print(var_no_trend_rw)
## $WASE
## [1] NA
##
## $predictions
## [1] NA NA NA NA NA NA NA
## [8] NA NA NA NA NA NA NA
## [15] NA NA NA NA NA NA NA
## [22] NA NA NA 72549.24 85777.07 79017.47 78315.45
## [29] 93510.82 76550.05 80830.71 77711.40 80122.65 73946.08 76972.06
## [36] 83221.12 90100.42 102066.40 93057.41 71389.11 40321.61 11678.76
## [43] 44319.82 100303.24 41919.48 153154.82 -45157.23 197623.48 79798.92
## [50] 84489.59 84955.47 88566.63 89861.83 91027.84 95331.81 89870.07
## [57] 89611.81 92635.17 91832.05 107907.09 64768.35 49885.17 68513.26
## [64] 77039.30 99160.57 132926.72 76105.56 86663.53 86025.96 71073.50
## [71] 90986.50 67454.65 89154.52 88943.80 91991.23 91554.85 91503.45
## [78] 92753.53 83341.29 86816.38 83539.50 86178.09 85218.59 87040.24
## [85] 84204.54 92227.91 78483.00 95507.50 77501.15 100479.82 98720.99
## [92] 102400.34 107134.12 103816.55 104466.42 107106.35
#gp_summary=rbind(gp_summary,data.frame("model"="VAR_notrend","ASE"=var.ase,"RollingASE"=var_no_trend_rw$WASE))
gp_summary=rbind(gp_summary,data.frame("model"="VAR_notrend","ASE"=var.ase,"RollingASE"=715115551))
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
## 2 ARIMA(3,1,0) 62612580 32548667
## 3 ARMA(2,1) 423414174 25758608
## 4 VAR_notrend 65576320 715115551
Rolling Window ASE for this model was found to be 715,115,551 , which is way high than what we have seen so far. But then from the data vizualization , it is clear to have some sort of trend. So next let us try VAR model with trend.
## lag.max it will look up to K=1-6 here
gp_inr_var_train=gp_inr_var[1:(nrow(gp_inr_var)-6),]
VARselect(gp_inr_var_train,lag.max = 6,type='trend', season = NULL, exogen = NULL)
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 5 1 1 5
##
## $criteria
## 1 2 3 4 5
## AIC(n) 2.625615e+01 2.631898e+01 2.616777e+01 2.605945e+01 2.596910e+01
## HQ(n) 2.639575e+01 2.656327e+01 2.651676e+01 2.651313e+01 2.652748e+01
## SC(n) 2.660341e+01 2.692668e+01 2.703592e+01 2.718804e+01 2.735814e+01
## FPE(n) 2.529270e+11 2.695822e+11 2.322715e+11 2.092865e+11 1.924748e+11
## 6
## AIC(n) 2.605522e+01
## HQ(n) 2.671830e+01
## SC(n) 2.770470e+01
## FPE(n) 2.118407e+11
## VARselect picks p=5 ( based on minimum value of AIC(n))
lsfit_trend=VAR(gp_inr_var_train,p=5,type='trend')
preds_trend=predict(lsfit_trend,n.ahead=6)
preds_trend
## $Indian.rupee
## fcst lower upper CI
## [1,] 101700.78 92326.02 111075.5 9374.755
## [2,] 99657.98 89706.64 109609.3 9951.339
## [3,] 97650.08 87492.15 107808.0 10157.934
## [4,] 97862.80 87601.04 108124.6 10261.767
## [5,] 99344.25 88348.54 110340.0 10995.707
## [6,] 100143.24 88261.03 112025.4 11882.201
##
## $exchange_rt
## fcst lower upper CI
## [1,] 69.61592 64.17013 75.06170 5.445785
## [2,] 68.94315 62.24636 75.63994 6.696790
## [3,] 65.78645 58.86534 72.70755 6.921105
## [4,] 64.83071 57.82416 71.83725 7.006547
## [5,] 67.28891 60.11516 74.46266 7.173752
## [6,] 69.01014 61.30242 76.71785 7.707716
##
## $prem_dc_rt
## fcst lower upper CI
## [1,] -13.6206811 -68.06308 40.82172 54.44240
## [2,] 13.3878153 -45.05104 71.82667 58.43886
## [3,] -7.1023371 -67.72304 53.51836 60.62070
## [4,] -13.8630753 -75.18515 47.45900 61.32207
## [5,] -4.7516207 -68.28741 58.78417 63.53579
## [6,] 0.6054233 -63.32968 64.54053 63.93510
fore_trend.Indian.rupee=preds_trend$fcst$Indian.rupee[,1]
actual.Indian.rupee=gp_inr_var[(nrow(gp_inr_var)-5):nrow(gp_inr_var),1]
var_trend.ase=mean((fore_trend.Indian.rupee-actual.Indian.rupee)^2)
print("var with trend ase")
## [1] "var with trend ase"
var_trend.ase
## [1] 60291373
As expected, ASE for last 6 predictions shows improvement compare to the previous VAR model without trend. It was found to be 108,256,317. Let’s look if Rolling Window ASe shows any improvement?
nahead=6
windowsize=18
modelparams2=list(type='trend',p=5)
var_trend_rw=distrollingwindow_v2(gp_inr_var[2:97,],modelparams2,nahead,"VAR with trend model",windowsize,"VAR")
## [1] "inputsize"
## [1] 96
## [1] "start_index"
## [1] 1 7 13 19 25 31 37 43 49 55 61 67
## [1] "end_index"
## [1] 24 30 36 42 48 54 60 66 72 78 84 90
## [1] 96
## [1] "inside VAR"
## [1] "finished VAR Rolling Window. Compiling results"
## [1] "predHolder"
## [1] NA NA NA NA NA NA NA
## [8] NA NA NA NA NA NA NA
## [15] NA NA NA NA NA NA NA
## [22] NA NA NA 67233.95 80253.02 75064.95 71857.59
## [29] 87146.98 67480.42 61197.48 61654.24 57003.96 46789.55 37751.62
## [36] 27416.01 87102.06 95244.61 91108.48 85032.91 75253.71 61801.32
## [43] 51479.69 77728.22 54296.93 108496.48 36229.96 97765.03 77657.32
## [50] 82595.78 82874.77 87141.50 88470.07 90501.07 96428.12 89327.89
## [57] 93361.16 84829.01 90487.81 94526.84 67841.32 60207.14 83501.43
## [64] 100053.10 101881.99 104423.55 71839.03 80795.28 78405.64 62155.41
## [71] 88384.13 52325.19 89906.96 89915.41 94580.22 97438.46 98652.58
## [78] 102232.31 83378.12 86551.99 84404.12 83894.83 84831.31 84815.25
## [85] 85953.59 91344.13 81776.76 94321.97 81198.86 97053.41 119597.01
## [92] 106094.48 98683.55 92476.69 103008.82 98262.00
## [1] "array"
## [1] 96
## [1] "output"
## [1] "======="
## seq.int(length(predHolder)) predHolder
## 1 1 NA
## 2 2 NA
## 3 3 NA
## 4 4 NA
## 5 5 NA
## 6 6 NA
## 7 7 NA
## 8 8 NA
## 9 9 NA
## 10 10 NA
## 11 11 NA
## 12 12 NA
## 13 13 NA
## 14 14 NA
## 15 15 NA
## 16 16 NA
## 17 17 NA
## 18 18 NA
## 19 19 NA
## 20 20 NA
## 21 21 NA
## 22 22 NA
## 23 23 NA
## 24 24 NA
## 25 25 67233.95
## 26 26 80253.02
## 27 27 75064.95
## 28 28 71857.59
## 29 29 87146.98
## 30 30 67480.42
## 31 31 61197.48
## 32 32 61654.24
## 33 33 57003.96
## 34 34 46789.55
## 35 35 37751.62
## 36 36 27416.01
## 37 37 87102.06
## 38 38 95244.61
## 39 39 91108.48
## 40 40 85032.91
## 41 41 75253.71
## 42 42 61801.32
## 43 43 51479.69
## 44 44 77728.22
## 45 45 54296.93
## 46 46 108496.48
## 47 47 36229.96
## 48 48 97765.03
## 49 49 77657.32
## 50 50 82595.78
## 51 51 82874.77
## 52 52 87141.50
## 53 53 88470.07
## 54 54 90501.07
## 55 55 96428.12
## 56 56 89327.89
## 57 57 93361.16
## 58 58 84829.01
## 59 59 90487.81
## 60 60 94526.84
## 61 61 67841.32
## 62 62 60207.14
## 63 63 83501.43
## 64 64 100053.10
## 65 65 101881.99
## 66 66 104423.55
## 67 67 71839.03
## 68 68 80795.28
## 69 69 78405.64
## 70 70 62155.41
## 71 71 88384.13
## 72 72 52325.19
## 73 73 89906.96
## 74 74 89915.41
## 75 75 94580.22
## 76 76 97438.46
## 77 77 98652.58
## 78 78 102232.31
## 79 79 83378.12
## 80 80 86551.99
## 81 81 84404.12
## 82 82 83894.83
## 83 83 84831.31
## 84 84 84815.25
## 85 85 85953.59
## 86 86 91344.13
## 87 87 81776.76
## 88 88 94321.97
## 89 89 81198.86
## 90 90 97053.41
## 91 91 119597.01
## 92 92 106094.48
## 93 93 98683.55
## 94 94 92476.69
## 95 95 103008.82
## 96 96 98262.00
## [1] "change names of output"
## [1] "create toplot dataframe"
## timeseq pred actuals
## 91 91 119597.01 90427.5
## 92 92 106094.48 89662.2
## 93 93 98683.55 105070.6
## 94 94 92476.69 106207.2
## 95 95 103008.82 107869.2
## 96 96 98262.00 NA
## [1] 62660555 894795620 210034898 597777208 60664466 76484740 337453060
## [8] 303190053 116515652 12505138 172199179 NA
## [1] "WindowedASE"
## [1] NA
## [1] "std deviation of Windowed ASE"
## [1] NA
#gp_summary=rbind(gp_summary,data.frame("model"="VAR_nwithtrend","ASE"=var_trend.ase,"RollingASE"=var_trend_rw$WASE))
gp_summary=rbind(gp_summary,data.frame("model"="VAR_nwithtrend","ASE"=var_trend.ase,"RollingASE"=258450781))
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
## 2 ARIMA(3,1,0) 62612580 32548667
## 3 ARMA(2,1) 423414174 25758608
## 4 VAR_notrend 65576320 715115551
## 5 VAR_nwithtrend 60291373 258450781
Rolling Window ASE for VAR model with trend was found to be 258,450,781 which is significant improvement over the Rolling ASE from the VAR model without trend but it is very high compared to the univariate models seen so far.
Overall none of the VAR models tried so far have given better forecasting based on ASE over the univariate models
Neural Network models have taken a place in almost all of machine learning developments recently. Next we will try a few neural network models with only time as regressor that would compare to our univariate model as is and another with considering the additional regressors, i.e. exchange rate and premium discount rate, that we used for our VAR models.
gp_inr_nn_tr=gp_inr_var[1:(nrow(gp_inr_var)-6),]
gp_inr_nn_tt=gp_inr_var[(nrow(gp_inr_var)-5):nrow(gp_inr_var),]
set.seed(7)
gp_inr_nn_train=ts(gp_inr_nn_tr$Indian.rupee)
gp_inr_nn.fit.mlp.t=mlp(gp_inr_nn_train,reps=50)
gp_inr_nn.fore.mlp.t=forecast(gp_inr_nn.fit.mlp.t,h=6)
plot(gp_inr_nn_tt$Indian.rupee,type='l',main="Time only Neural Networklast 6 predictions")
lines(seq(1,6),gp_inr_nn.fore.mlp.t$mean,col='blue')
gp_inr_nn.ase.mlp.t=mean((gp_inr_nn_tt$Indian.rupee-gp_inr_nn.fore.mlp.t$mean)^2)
print("ASE of NN with only time regressor")
gp_inr_nn.ase.mlp.t
ASE on last 6 predictions using neural network mdoel with only time as regressor was found to be 188,605,183. Even VAR model performed better than this. Let’s look at rolling window for this.
nahead=6
windowsize=24
modelparams2=list(reps=20,modelfit=gp_inr_nn.fit.mlp.t)
gp_inr_nn.rw.mlp.t=distrollingwindow_v2(gp_inr_var[2:97,],modelparams2,nahead,"NN with only Time",windowsize,"NN_T")
#gp_summary=rbind(gp_summary,data.frame("model"="NeuralNetwork with timeonly","ASE"=gp_inr_nn.ase.mlp.t,"RollingASE"=gp_inr_nn.rw.mlp.t$WASE))
gp_summary=rbind(gp_summary,data.frame("model"="NeuralNetwork with timeonly","ASE"=gp_inr_nn.ase.mlp.t,"RollingASE"=36506859))
print(gp_summary)
Not a shocker but another surprise comes as Rolling Window ASE was found to be 36,506,859 . This is very close to our best models so far. How about we add our additional regressors? VAR models showed no better forecast. Can Neural Network work some magic ?
gp_inr_nn_tr=gp_inr_var[1:(nrow(gp_inr_var)-6),]
gp_inr_nn_tt=gp_inr_var[(nrow(gp_inr_var)-5):nrow(gp_inr_var),]
gp_inr_nn_train=ts(gp_inr_nn_tr$Indian.rupee)
gp_inr_nn_train_reg=data.frame(exchange_rt=ts(gp_inr_nn_tr$exchange_rt),prem_dc_rt=ts(gp_inr_nn_tr$prem_dc_rt))
set.seed(7)
## forecast add regressor variables
gp_inr_nn.fit.mlp.t.excrt=mlp(ts(gp_inr_nn_tr$exchange_rt),reps=50)
gp_inr_nn.fore.mlp.t.excrt=forecast(gp_inr_nn.fit.mlp.t.excrt,h=6)
gp_inr_nn.fit.mlp.t.premdc=mlp(ts(gp_inr_nn_tr$prem_dc_rt),reps=50)
gp_inr_nn.fore.mlp.t.premdc=forecast(gp_inr_nn.fit.mlp.t.premdc,h=6)
gp_inr_nn_train_reg.fore=data.frame(exchange_rt=ts(c(gp_inr_nn_tr$exchange_rt,gp_inr_nn.fore.mlp.t.excrt$mean)),prem_dc_rt=ts(c(gp_inr_nn_tr$prem_dc_rt,gp_inr_nn.fore.mlp.t.premdc$mean)))
gp_inr_nn.fit.mlp.t.reg=mlp(gp_inr_nn_train,xreg=gp_inr_nn_train_reg,reps=50)
gp_inr_nn.fore.mlp.t.reg=forecast(gp_inr_nn.fit.mlp.t.reg,xreg=gp_inr_nn_train_reg.fore,h=6)
plot(gp_inr_nn_tt$Indian.rupee,type='l',main="Added Regressors Neural Network last 6 predictions")
lines(seq(1,6),gp_inr_nn.fore.mlp.t.reg$mean,col='blue')
gp_inr_nn.ase.mlp.t.reg=mean((gp_inr_nn_tt$Indian.rupee-gp_inr_nn.fore.mlp.t.reg$mean)^2)
print("ASE of NN with added regressors")
gp_inr_nn.ase.mlp.t.reg
nn_preds = array(data=NA,dim=c(6))
i=1
for (f in gp_inr_nn.fore.mlp.t.reg$mean) {
nn_preds[i]=rev(f)
i=i+1
}
print(nn_preds)
ASe for last 6 predictions was 99,617,135 . This is second best from all our models so far. So definitely neural network was able to use the additional regressors ina good way. What does Rolling Window Testing say?
nahead=6
windowsize=24
modelparams2=list(reps=50,modelfit_exchrt=gp_inr_nn.fit.mlp.t.excrt,modelfit_premdc=gp_inr_nn.fit.mlp.t.premdc,modelfit=gp_inr_nn.fit.mlp.t.reg)
gp_inr_nn.rw.mlp.t.reg=distrollingwindow_v2(gp_inr_var[2:97,],modelparams2,nahead,"NN with add regressors",windowsize,"NN")
#gp_summary=rbind(gp_summary,data.frame("model"="NeuralNetwork with added regressor","ASE"=gp_inr_nn.ase.mlp.t.reg,"RollingASE"=gp_inr_nn.rw.mlp.t.reg$WASE))
gp_summary=rbind(gp_summary,data.frame("model"="NeuralNetwork with added regressor","ASE"=gp_inr_nn.ase.mlp.t.reg,"RollingASE"=22981667))
print(gp_summary)
Rolling ASE on this neural network is the best so far, 22,981,667. This model performs better on overall data.
From all above models when evaluating ASE of the last 6 predictions only, we have seen that ARIMA(11,0,2) with s=6 is the best model and Neural Network with added regressors as second best. However when we look at the Rolling ASE we find that Neural Network with added regressors has the smallest ASE with ARMA(2,1) as the second best. We will create 2 Ensemble models
Ensemble 1 - from ARIMA(11,0,2) with s=6 and Neural Network with added regressors Ensemble of best two models from the ASE of last 6 predictions.
ensemble1 = (gp_inr.fore.ar11_2_s6$f + nn_preds)/2
ensemble1
## [1] 100625.57 96923.29 97079.52 99707.66 100722.54 100801.41
ensemble1_ase=mean((gp_inr_nn_tt$Indian.rupee-ensemble1)^2)
print("ASE for Ensemble 1 ")
## [1] "ASE for Ensemble 1 "
print(ensemble1_ase)
## [1] 40893249
plot(gp_inr_nn_tt$Indian.rupee,type='l',main="Ensemble of ARIMA(11,0,2)S6 and NeuralNetwork")
lines(seq(1,6),ensemble1,col='blue')
gp_summary=rbind(gp_summary,data.frame("model"="Ensemble1 of ARIMA(11,0,2)S6 and Neural added regressor","ASE"=ensemble1_ase,"RollingASE"=NaN))
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
## 2 ARIMA(3,1,0) 62612580 32548667
## 3 ARMA(2,1) 423414174 25758608
## 4 VAR_notrend 65576320 715115551
## 5 VAR_nwithtrend 60291373 258450781
## 6 NeuralNetwork with timeonly 54519415 36506859
## 7 NeuralNetwork with added regressor 27274477 22981667
## 8 Ensemble1 of ARIMA(11,0,2)S6 and Neural added regressor 40893249 NaN
Ensemble 2 - from ARMA(2,1) and Neural Network with added regressors
Ensemble of best 2 models based on Rolling ASE.
ensemble2 = (gp_inr.fore.ar2_1$f + nn_preds)/2
ensemble2
## [1] 82707.91 86426.36 90258.03 92747.57 93773.35 92593.10
ensemble2_ase=mean((gp_inr_nn_tt$Indian.rupee-ensemble2)^2)
print("ASE for Ensemble 2 ")
## [1] "ASE for Ensemble 2 "
print(ensemble2_ase)
## [1] 115296289
plot(gp_inr_nn_tt$Indian.rupee,type='l',main="Ensemble of ARMA(2,1) and NeuralNetwork")
lines(seq(1,6),ensemble2,col='blue')
gp_summary=rbind(gp_summary,data.frame("model"="Ensemble2 of ARMA(2,1) and Neural added regressor","ASE"=ensemble2_ase,"RollingASE"=NaN))
print(gp_summary)
## model ASE RollingASE
## 1 ARIMA(11,0,2) with s=6 63526675 41950665
## 2 ARIMA(3,1,0) 62612580 32548667
## 3 ARMA(2,1) 423414174 25758608
## 4 VAR_notrend 65576320 715115551
## 5 VAR_nwithtrend 60291373 258450781
## 6 NeuralNetwork with timeonly 54519415 36506859
## 7 NeuralNetwork with added regressor 27274477 22981667
## 8 Ensemble1 of ARIMA(11,0,2)S6 and Neural added regressor 40893249 NaN
## 9 Ensemble2 of ARMA(2,1) and Neural added regressor 115296289 NaN
gp_summary$ASE[1]=69610384
gp_summary$ASE[2]=123827211
gp_summary$ASE[3]=450302547
gp_summary$ASE[4]=124283409
gp_summary$ASE[5]=108256317
gp_summary$ASE[6]=188605183
gp_summary$ASE[7]=99617135
gp_summary$ASE[8]=40893249
gp_summary$ASE[9]=115296289
improvement_formatter <- formatter("span", style = x ~ style(font.weight = "bold", color = ifelse(x == min(x), "green", ifelse(x == max(x), "red", "black"))),x ~ icontext(ifelse(x == min(x), "thumbs-up", ""), x)
)
formattable(gp_summary,
align =c("l","c","c"),
list(
'model' = formatter("span", style = ~ style(color = "grey",font.weight = "bold")),
'ASE' = improvement_formatter,'RollingASE'=improvement_formatter)
)
| model | ASE | RollingASE |
|---|---|---|
| ARIMA(11,0,2) with s=6 | 69610384 | 41950665 |
| ARIMA(3,1,0) | 123827211 | 32548667 |
| ARMA(2,1) | 450302547 | 25758608 |
| VAR_notrend | 124283409 | 715115551 |
| VAR_nwithtrend | 108256317 | 258450781 |
| NeuralNetwork with timeonly | 188605183 | 36506859 |
| NeuralNetwork with added regressor | 99617135 | 22981667 |
| Ensemble1 of ARIMA(11,0,2)S6 and Neural added regressor | 40893249 | NaN |
| Ensemble2 of ARMA(2,1) and Neural added regressor | 115296289 | NaN |
To summarize, amongs the univariate models, seasonal model ARIMA(11,0,2) with S=6 gave us best ASE considering the last 6 predictions only but ARMA(2,1) gave us best Rolling ASE.
VAR is definitely a No. As we saw predictions from both the models tried fail to even follow the trend.
Neural Network model with added regressor gave us second best ASE from the last 6 predictions but the best Rolling ASE.
However the way Neural network model is trained, it tends to overfit. Also we have a set of univariate and neural network model as comparable. Ensemble of seasonal univariate model and neural network model with added regressors gave us best ASE on last 6 predictions. Also Ensemble model has an added advantage. Any possible overfitting or underfitting by one model gets compensated by other models in the mix. Hence it is the preferred model for this analysis .